teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (108) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +71 -0
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +51 -24
  6. teradataml/analytics/json_parser/utils.py +11 -17
  7. teradataml/automl/__init__.py +103 -48
  8. teradataml/automl/data_preparation.py +55 -37
  9. teradataml/automl/data_transformation.py +131 -69
  10. teradataml/automl/feature_engineering.py +117 -185
  11. teradataml/automl/feature_exploration.py +9 -2
  12. teradataml/automl/model_evaluation.py +13 -25
  13. teradataml/automl/model_training.py +214 -75
  14. teradataml/catalog/model_cataloging_utils.py +1 -1
  15. teradataml/clients/auth_client.py +133 -0
  16. teradataml/common/aed_utils.py +3 -2
  17. teradataml/common/constants.py +11 -6
  18. teradataml/common/garbagecollector.py +5 -0
  19. teradataml/common/messagecodes.py +3 -1
  20. teradataml/common/messages.py +2 -1
  21. teradataml/common/utils.py +6 -0
  22. teradataml/context/context.py +49 -29
  23. teradataml/data/advertising.csv +201 -0
  24. teradataml/data/bank_marketing.csv +11163 -0
  25. teradataml/data/bike_sharing.csv +732 -0
  26. teradataml/data/boston2cols.csv +721 -0
  27. teradataml/data/breast_cancer.csv +570 -0
  28. teradataml/data/customer_segmentation_test.csv +2628 -0
  29. teradataml/data/customer_segmentation_train.csv +8069 -0
  30. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  31. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  32. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  33. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  34. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  35. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  36. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  37. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  38. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  39. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  40. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  41. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  42. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  43. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  44. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  45. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  46. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  47. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  48. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  49. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  50. teradataml/data/glm_example.json +28 -1
  51. teradataml/data/housing_train_segment.csv +201 -0
  52. teradataml/data/insect2Cols.csv +61 -0
  53. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  54. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  55. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  56. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  57. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  58. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  59. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  60. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  61. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  62. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  63. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  64. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  65. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  66. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  67. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  68. teradataml/data/kmeans_example.json +5 -0
  69. teradataml/data/kmeans_table.csv +10 -0
  70. teradataml/data/onehot_encoder_train.csv +4 -0
  71. teradataml/data/openml_example.json +29 -0
  72. teradataml/data/scale_attributes.csv +3 -0
  73. teradataml/data/scale_example.json +52 -1
  74. teradataml/data/scale_input_part_sparse.csv +31 -0
  75. teradataml/data/scale_input_partitioned.csv +16 -0
  76. teradataml/data/scale_input_sparse.csv +11 -0
  77. teradataml/data/scale_parameters.csv +3 -0
  78. teradataml/data/scripts/deploy_script.py +20 -1
  79. teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
  80. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
  81. teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
  82. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  83. teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
  84. teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
  85. teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
  86. teradataml/data/teradataml_example.json +77 -0
  87. teradataml/data/ztest_example.json +16 -0
  88. teradataml/dataframe/copy_to.py +8 -3
  89. teradataml/dataframe/data_transfer.py +120 -61
  90. teradataml/dataframe/dataframe.py +102 -17
  91. teradataml/dataframe/dataframe_utils.py +47 -9
  92. teradataml/dataframe/fastload.py +272 -89
  93. teradataml/dataframe/sql.py +84 -0
  94. teradataml/dbutils/dbutils.py +2 -2
  95. teradataml/lib/aed_0_1.dll +0 -0
  96. teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
  97. teradataml/options/__init__.py +13 -4
  98. teradataml/options/configure.py +27 -6
  99. teradataml/scriptmgmt/UserEnv.py +19 -16
  100. teradataml/scriptmgmt/lls_utils.py +117 -14
  101. teradataml/table_operators/Script.py +2 -3
  102. teradataml/table_operators/TableOperator.py +58 -10
  103. teradataml/utils/validators.py +40 -2
  104. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
  105. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
  106. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
  107. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
  108. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
@@ -31,6 +31,8 @@ from teradataml import ScaleTransform
31
31
  from teradataml import SimpleImputeTransform
32
32
  from teradataml import TargetEncodingTransform
33
33
  from teradataml import Transform, UtilFuncs, TeradataConstants
34
+ from teradataml.common.garbagecollector import GarbageCollector
35
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
34
36
 
35
37
  # AutoML Internal libraries
36
38
  from teradataml.automl.feature_exploration import _FeatureExplore
@@ -58,12 +60,12 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
58
60
  Types: teradataml Dataframe
59
61
 
60
62
  data_transformation_params:
61
- Required Arugment.
63
+ Required Argument.
62
64
  Specifies the parameters for performing data transformation.
63
65
  Types: dict
64
66
 
65
67
  auto:
66
- Optional Arugment.
68
+ Optional Argument.
67
69
  Specifies whether to run AutoML in custom mode or auto mode.
68
70
  When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
69
71
  Default Value: True
@@ -80,7 +82,7 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
80
82
  Types: int
81
83
 
82
84
  target_column_ind:
83
- Optional Arugment.
85
+ Optional Argument.
84
86
  Specifies whether target column is present in given dataset.
85
87
  Default Value: False
86
88
  Types: bool
@@ -118,6 +120,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
118
120
  # Extracting target column details and type whether it is classification or not
119
121
  self.data_target_column = self.data_transformation_params.get("data_target_column")
120
122
  self.classification_type = self.data_transformation_params.get("classification_type", False)
123
+
124
+ # Setting number of jobs for progress bar based on mode of execution
125
+ jobs = 10 if self.auto else 15
126
+ self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Transformation Running:')
127
+
121
128
  # Performing transformation carried out in feature engineering phase
122
129
  self.feature_engineering_transformation()
123
130
  # Performing transformation carried out in data preparation phase
@@ -133,27 +140,52 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
133
140
  on test data using parameters from data_transformation_params.
134
141
  """
135
142
  self._display_msg(msg="Performing transformation carried out in feature engineering phase ...",
136
- show_data=True)
143
+ show_data=True,
144
+ progress_bar=self.progress_bar)
145
+
137
146
  # Performing default transformation for both auto and custom mode
138
147
  self._preprocess_transformation()
148
+ self.progress_bar.update()
149
+
139
150
  self._futile_column_handling_transformation()
151
+ self.progress_bar.update()
152
+
140
153
  # Handling target column transformation
141
154
  if self.target_column_ind and self.classification_type:
142
155
  self._handle_target_column_transformation()
156
+ self.progress_bar.update()
157
+
143
158
  self._date_column_handling_transformation()
159
+ self.progress_bar.update()
144
160
 
145
161
  # Performing transformation according to run mode
146
162
  if self.auto:
147
163
  self._missing_value_handling_transformation()
164
+ self.progress_bar.update()
165
+
148
166
  self._categorical_encoding_transformation()
167
+ self.progress_bar.update()
149
168
  else:
150
169
  self._custom_missing_value_handling_transformation()
170
+ self.progress_bar.update()
171
+
151
172
  self._custom_bincode_column_transformation()
173
+ self.progress_bar.update()
174
+
152
175
  self._custom_string_column_transformation()
176
+ self.progress_bar.update()
177
+
153
178
  self._custom_categorical_encoding_transformation()
179
+ self.progress_bar.update()
180
+
154
181
  self._custom_mathematical_transformation()
182
+ self.progress_bar.update()
183
+
155
184
  self._custom_non_linear_transformation()
185
+ self.progress_bar.update()
186
+
156
187
  self._custom_anti_select_column_transformation()
188
+ self.progress_bar.update()
157
189
 
158
190
  def data_preparation_transformation(self):
159
191
  """
@@ -162,15 +194,23 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
162
194
  on test data using parameters from data_transformation_params.
163
195
  """
164
196
  self._display_msg(msg="Performing transformation carried out in data preparation phase ...",
165
- show_data=True)
197
+ show_data=True,
198
+ progress_bar=self.progress_bar)
199
+
166
200
  # Handling features transformed from feature engineering phase
167
201
  self._handle_generated_features_transformation()
202
+ self.progress_bar.update()
168
203
 
169
204
  # Performing transformation including feature selection using lasso, rfe and pca
170
205
  # followed by scaling
171
206
  self._feature_selection_lasso_transformation()
207
+ self.progress_bar.update()
208
+
172
209
  self._feature_selection_rfe_transformation()
210
+ self.progress_bar.update()
211
+
173
212
  self._feature_selection_pca_transformation()
213
+ self.progress_bar.update()
174
214
 
175
215
  def _preprocess_transformation(self):
176
216
  """
@@ -182,7 +222,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
182
222
  if columns_to_be_removed:
183
223
  self.data = self.data.drop(columns_to_be_removed, axis=1)
184
224
  self._display_msg(msg="\nUpdated dataset after dropping irrelevent columns :",
185
- data=self.data)
225
+ data=self.data,
226
+ progress_bar=self.progress_bar)
186
227
 
187
228
  # Adding id column
188
229
  self.data = FillRowId(data=self.data, row_id_column='id').result
@@ -197,7 +238,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
197
238
  if futile_cols:
198
239
  self.data = self.data.drop(futile_cols, axis=1)
199
240
  self._display_msg(msg="\nUpdated dataset after dropping futile columns :",
200
- data=self.data)
241
+ data=self.data,
242
+ progress_bar=self.progress_bar)
201
243
 
202
244
  def _date_column_handling_transformation(self):
203
245
  """
@@ -205,47 +247,32 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
205
247
  Function performs transformation on date columns and generates new columns.
206
248
  """
207
249
  # Extracting date columns
208
- date_columns = self.data_transformation_params.get("date_columns",None)
209
- if date_columns:
250
+ self.date_column_list = self.data_transformation_params.get("date_columns",None)
251
+ if self.date_column_list:
210
252
  # Dropping rows with null values in date columns
211
- self.data = self.data.dropna(subset=date_columns)
253
+ self.data = self.data.dropna(subset=self.date_column_list)
212
254
  # Extracting unique date columns for dropping
213
255
  drop_unique_date_columns = self.data_transformation_params.get("drop_unique_date_columns",None)
214
256
  if drop_unique_date_columns:
215
257
  self.data = self.data.drop(drop_unique_date_columns, axis=1)
258
+ # Updated date column list after dropping irrelevant date columns
259
+ self.date_column_list = [item for item in self.date_column_list if item not in drop_unique_date_columns]
216
260
 
217
- # Extracting date components parameters for new columns generation
218
- extract_date_comp_param = self.data_transformation_params.get("extract_date_comp_param",None)
219
- extract_date_comp_col = self.data_transformation_params.get("extract_date_comp_col", None)
220
- if extract_date_comp_param:
221
- self.data=self.data.assign(**extract_date_comp_param)
222
- self.data = self.data.drop(extract_date_comp_col, axis=1)
223
-
224
- # Extracting irrelevant date component columns for dropping
225
- drop_extract_date_columns = self.data_transformation_params.get("drop_extract_date_columns", None)
226
- if drop_extract_date_columns:
227
- self.data = self.data.drop(drop_extract_date_columns, axis=1)
228
-
229
- # Extracting date component fit objects for bincode transformation
230
- day_component_fit_object = self.data_transformation_params.get("day_component_fit_object", None)
231
- month_component_fit_object = self.data_transformation_params.get("month_component_fit_object", None)
232
- year_diff_component_fit_object = self.data_transformation_params.get("year_diff_component_fit_object", None)
233
-
234
- # Performing bincode transformation on day, month and year components
235
- for fit_object in [day_component_fit_object, month_component_fit_object, year_diff_component_fit_object]:
236
- if fit_object:
237
- for col, bin_code_fit in fit_object.items():
238
- accumulate_columns = self._extract_list(self.data.columns, [col])
239
- transform_params = {
240
- "data": self.data,
241
- "object": bin_code_fit,
242
- "accumulate": accumulate_columns,
243
- "persist": True
244
- }
245
- self.data = BincodeTransform(**transform_params).result
246
-
247
- self._display_msg(msg="\nUpdated dataset after transforming date columns :",
248
- data=self.data)
261
+ if len(self.date_column_list) != 0:
262
+ # Extracting date components parameters for new columns generation
263
+ new_columns=self._fetch_date_component()
264
+
265
+ # Extracting irrelevant date component columns for dropping
266
+ drop_extract_date_columns = self.data_transformation_params.get("drop_extract_date_columns", None)
267
+ if drop_extract_date_columns:
268
+ self.data = self.data.drop(drop_extract_date_columns, axis=1)
269
+ new_columns = [item for item in new_columns if item not in drop_extract_date_columns]
270
+
271
+ self._display_msg(msg='Updated list of newly generated features from existing date features :',
272
+ col_lst=new_columns)
273
+ self._display_msg(msg="\nUpdated dataset after transforming date columns :",
274
+ data=self.data,
275
+ progress_bar=self.progress_bar)
249
276
 
250
277
  def _missing_value_handling_transformation(self):
251
278
  """
@@ -257,7 +284,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
257
284
  if drop_cols:
258
285
  self.data = self.data.drop(drop_cols, axis=1)
259
286
  self._display_msg(msg="\nUpdated dataset after dropping missing value containing columns : ",
260
- data=self.data)
287
+ data=self.data,
288
+ progress_bar=self.progress_bar)
261
289
 
262
290
  # Extracting imputation columns and fit object for missing value imputation
263
291
  imputation_cols = self.data_transformation_params.get("imputation_columns", None)
@@ -265,20 +293,22 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
265
293
  sm_fit_obj = self.data_transformation_params.get("imputation_fit_object")
266
294
  # imputing column using fit object
267
295
  self.data = SimpleImputeTransform(data=self.data,
268
- object=sm_fit_obj,
269
- volatile=True).result
296
+ object=sm_fit_obj).result
270
297
  self._display_msg(msg="\nUpdated dataset after imputing missing value containing columns :",
271
- data=self.data)
298
+ data=self.data,
299
+ progress_bar=self.progress_bar)
272
300
 
273
301
  # Handling rest null, its temporary solution. It subjects to change based on input.
274
302
  dropped_data = self.data.dropna()
275
303
  dropped_count = self.data.shape[0] - dropped_data.shape[0]
276
304
  if dropped_count > 0:
277
- self.data = dropped_data
278
305
  self._display_msg(msg="\nFound additional {} rows that contain missing values :".format(dropped_count),
279
- data=self.data)
306
+ data=self.data,
307
+ progress_bar=self.progress_bar)
308
+ self.data = dropped_data
280
309
  self._display_msg(msg="\nUpdated dataset after dropping additional missing value containing rows :",
281
- data=self.data)
310
+ data=self.data,
311
+ progress_bar=self.progress_bar)
282
312
 
283
313
  def _custom_missing_value_handling_transformation(self):
284
314
  """
@@ -291,7 +321,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
291
321
  if drop_col_list:
292
322
  self.data = self.data.drop(drop_col_list, axis=1)
293
323
  self._display_msg(msg="\nUpdated dataset after dropping customized missing value containing columns :",
294
- data=self.data)
324
+ data=self.data,
325
+ progress_bar=self.progress_bar)
295
326
 
296
327
  # Extracting custom imputation columns and fit object for missing value imputation
297
328
  custom_imp_ind = self.data_transformation_params.get("custom_imputation_ind", False)
@@ -299,10 +330,10 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
299
330
  sm_fit_obj = self.data_transformation_params.get("custom_imputation_fit_object")
300
331
  # imputing column using fit object
301
332
  self.data = SimpleImputeTransform(data=self.data,
302
- object=sm_fit_obj,
303
- volatile=True).result
333
+ object=sm_fit_obj).result
304
334
  self._display_msg(msg="\nUpdated dataset after imputing customized missing value containing columns :",
305
- data=self.data)
335
+ data=self.data,
336
+ progress_bar=self.progress_bar)
306
337
  # Handling rest with default missing value handling
307
338
  self._missing_value_handling_transformation()
308
339
 
@@ -328,8 +359,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
328
359
  "persist" : True,
329
360
  }
330
361
  self.data = BincodeTransform(**eql_transform_params).result
362
+ # Adding transformed data containing table to garbage collector
363
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
331
364
  self._display_msg(msg="\nUpdated dataset after performing customized equal width bin-code transformation :",
332
- data=self.data)
365
+ data=self.data,
366
+ progress_bar=self.progress_bar)
333
367
 
334
368
  # Hnadling bincode transformation for Variable-Width
335
369
  custom_var_bincode_col = self.data_transformation_params.get("custom_var_bincode_col", None)
@@ -346,8 +380,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
346
380
  "persist" : True
347
381
  }
348
382
  self.data = BincodeTransform(**var_transform_params).result
383
+ # Adding transformed data containing table to garbage collector
384
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
349
385
  self._display_msg(msg="\nUpdated dataset after performing customized variable width bin-code transformation :",
350
- data=self.data)
386
+ data=self.data,
387
+ progress_bar=self.progress_bar)
351
388
 
352
389
  def _custom_string_column_transformation(self):
353
390
  """
@@ -362,7 +399,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
362
399
  for target_col,transform_val in custom_string_manipulation_param.items():
363
400
  self.data = self._str_method_mapping(target_col, transform_val)
364
401
  self._display_msg(msg="\nUpdated dataset after performing customized string manipulation :",
365
- data=self.data)
402
+ data=self.data,
403
+ progress_bar=self.progress_bar)
366
404
 
367
405
  def _categorical_encoding_transformation(self):
368
406
  """
@@ -384,10 +422,13 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
384
422
  }
385
423
  # Performing one hot encoding transformation
386
424
  self.data = OneHotEncodingTransform(**transform_params).result
425
+ # Adding transformed data containing table to garbage collector
426
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
387
427
  # Dropping old columns after encoding
388
428
  self.data = self.data.drop(one_hot_encoding_drop_list, axis=1)
389
429
  self._display_msg(msg="\nUpdated dataset after performing categorical encoding :",
390
- data=self.data)
430
+ data=self.data,
431
+ progress_bar=self.progress_bar)
391
432
 
392
433
  def _custom_categorical_encoding_transformation(self):
393
434
  """
@@ -412,6 +453,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
412
453
  }
413
454
  # Performing ordinal encoding transformation
414
455
  self.data = OrdinalEncodingTransform(**transform_params).result
456
+ # Adding transformed data containing table to garbage collector
457
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
415
458
  # Extracting parameters for target encoding
416
459
  custom_target_encoding_ind = self.data_transformation_params.get("custom_target_encoding_ind", False)
417
460
  custom_target_encoding_fit_obj = self.data_transformation_params.get("custom_target_encoding_fit_obj", None)
@@ -426,10 +469,13 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
426
469
  "accumulate" : accumulate_columns,
427
470
  "persist" : True
428
471
  }
429
- # Performing ordinal encoding transformation
472
+ # Performing target encoding transformation
430
473
  self.data = TargetEncodingTransform(**transform_params).result
474
+ # Adding transformed data containing table to garbage collector
475
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
431
476
  self._display_msg(msg="\nUpdated dataset after performing customized categorical encoding :",
432
- data=self.data)
477
+ data=self.data,
478
+ progress_bar=self.progress_bar)
433
479
 
434
480
  # Handling rest with default categorical encoding transformation
435
481
  self._categorical_encoding_transformation()
@@ -472,8 +518,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
472
518
  }
473
519
  # Peforming transformation on target columns
474
520
  self.data = Transform(**transform_params).result
521
+ # Adding transformed data containing table to garbage collector
522
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
475
523
  self._display_msg(msg="\nUpdated dataset after performing customized mathematical transformation :",
476
- data=self.data)
524
+ data=self.data,
525
+ progress_bar=self.progress_bar)
477
526
 
478
527
  def _custom_non_linear_transformation(self):
479
528
  """
@@ -495,8 +544,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
495
544
  }
496
545
  # Performing transformation
497
546
  self.data = NonLinearCombineTransform(**transform_params).result
547
+ # Adding transformed data containing table to garbage collector
548
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
498
549
  self._display_msg(msg="\nUpdated dataset after performing customized non-linear transformation :",
499
- data=self.data)
550
+ data=self.data,
551
+ progress_bar=self.progress_bar)
500
552
 
501
553
  def _custom_anti_select_column_transformation(self):
502
554
  """
@@ -516,7 +568,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
516
568
  # Performing transformation for given user input
517
569
  self.data = Antiselect(**fit_params).result
518
570
  self._display_msg(msg="\nUpdated dataset after performing customized anti-selection :",
519
- data=self.data)
571
+ data=self.data,
572
+ progress_bar=self.progress_bar)
520
573
 
521
574
  def _handle_generated_features_transformation(self):
522
575
  """
@@ -541,6 +594,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
541
594
  "accumulate" : accumulate_columns,
542
595
  "persist" : True}
543
596
  self.data = RoundColumns(**fit_params).result
597
+ # Adding transformed data containing table to garbage collector
598
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
544
599
 
545
600
  def _handle_target_column_transformation(self):
546
601
  """
@@ -565,7 +620,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
565
620
  }
566
621
  # Performing ordinal encoding transformation
567
622
  self.data = OrdinalEncodingTransform(**transform_params).result
568
-
623
+ # Adding transformed data containing table to garbage collector
624
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
569
625
  # Converting target column to integer datatype
570
626
  params = {
571
627
  "data" : self.data,
@@ -575,7 +631,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
575
631
  }
576
632
  self.data = ConvertTo(**params).result
577
633
  self._display_msg(msg="\nUpdated dataset after performing target column transformation :",
578
- data=self.data)
634
+ data=self.data,
635
+ progress_bar=self.progress_bar)
579
636
 
580
637
  def _extract_and_display_features(self, feature_type, feature_list):
581
638
  """
@@ -605,7 +662,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
605
662
 
606
663
  # Displaying feature dataframe
607
664
  self._display_msg(msg=f"\nUpdated dataset after performing {feature_type} feature selection:",
608
- data=feature_df)
665
+ data=feature_df,
666
+ progress_bar=self.progress_bar)
609
667
 
610
668
  # Returning feature dataframe
611
669
  return feature_df
@@ -631,7 +689,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
631
689
  accumulate=accumulate_cols).result
632
690
  # Displaying scaled dataset
633
691
  self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
634
- data=lasso_df)
692
+ data=lasso_df,
693
+ progress_bar=self.progress_bar)
635
694
 
636
695
  # Uploading lasso dataset to table for further use
637
696
  table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_new_test",
@@ -667,7 +726,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
667
726
  accumulate=accumulate_cols).result
668
727
  # Displaying scaled dataset
669
728
  self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
670
- data=rfe_df)
729
+ data=rfe_df,
730
+ progress_bar=self.progress_bar)
671
731
 
672
732
  # Uploading rfe dataset to table for further use
673
733
  table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_new_test",
@@ -691,7 +751,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
691
751
  accumulate=accumulate_cols).result
692
752
  # Displaying scaled dataset
693
753
  self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
694
- data=pca_scaled_df)
754
+ data=pca_scaled_df,
755
+ progress_bar=self.progress_bar)
695
756
 
696
757
  # Convert to pandas dataframe for applying pca
697
758
  pca_scaled_pd = pca_scaled_df.to_pandas()
@@ -718,7 +779,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
718
779
  pca_df[self.data_target_column] = pca_scaled_pd[self.data_target_column].reset_index(drop=True)
719
780
  # Displaying pca dataframe
720
781
  self._display_msg(msg="\nUpdated dataset after performing PCA feature selection :",
721
- data=pca_df)
782
+ data=pca_df.head(10),
783
+ progress_bar=self.progress_bar)
722
784
 
723
785
  # Uploading pca dataset to table for further use
724
786
  table_name = UtilFuncs._generate_temp_table_name(prefix="pca_new_test",