teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show
  1. teradataml/LICENSE.pdf +0 -0
  2. teradataml/README.md +112 -0
  3. teradataml/__init__.py +6 -3
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/__init__.py +3 -2
  6. teradataml/analytics/analytic_function_executor.py +224 -16
  7. teradataml/analytics/analytic_query_generator.py +92 -0
  8. teradataml/analytics/byom/__init__.py +3 -2
  9. teradataml/analytics/json_parser/metadata.py +1 -0
  10. teradataml/analytics/json_parser/utils.py +6 -4
  11. teradataml/analytics/meta_class.py +40 -1
  12. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  13. teradataml/analytics/sqle/__init__.py +10 -2
  14. teradataml/analytics/table_operator/__init__.py +3 -2
  15. teradataml/analytics/uaf/__init__.py +21 -2
  16. teradataml/analytics/utils.py +62 -1
  17. teradataml/analytics/valib.py +1 -1
  18. teradataml/automl/__init__.py +1502 -323
  19. teradataml/automl/custom_json_utils.py +139 -61
  20. teradataml/automl/data_preparation.py +245 -306
  21. teradataml/automl/data_transformation.py +32 -12
  22. teradataml/automl/feature_engineering.py +313 -82
  23. teradataml/automl/model_evaluation.py +44 -35
  24. teradataml/automl/model_training.py +109 -146
  25. teradataml/catalog/byom.py +8 -8
  26. teradataml/clients/pkce_client.py +1 -1
  27. teradataml/common/constants.py +37 -0
  28. teradataml/common/deprecations.py +13 -7
  29. teradataml/common/garbagecollector.py +151 -120
  30. teradataml/common/messagecodes.py +4 -1
  31. teradataml/common/messages.py +2 -1
  32. teradataml/common/sqlbundle.py +1 -1
  33. teradataml/common/utils.py +97 -11
  34. teradataml/common/wrapper_utils.py +1 -1
  35. teradataml/context/context.py +72 -2
  36. teradataml/data/complaints_test_tokenized.csv +353 -0
  37. teradataml/data/complaints_tokens_model.csv +348 -0
  38. teradataml/data/covid_confirm_sd.csv +83 -0
  39. teradataml/data/dataframe_example.json +10 -0
  40. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  41. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  42. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  43. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  44. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  45. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  46. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  47. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  48. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  49. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  51. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  52. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  53. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  54. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  55. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  57. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  58. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  59. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  60. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  61. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  62. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  63. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  64. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  65. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  67. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  68. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  69. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  70. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  71. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  72. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  74. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  75. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  76. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  77. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  78. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  79. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  80. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  81. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  82. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  83. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  84. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  85. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  86. teradataml/data/dwt2d_dataTable.csv +65 -0
  87. teradataml/data/dwt_dataTable.csv +8 -0
  88. teradataml/data/dwt_filterTable.csv +3 -0
  89. teradataml/data/finance_data4.csv +13 -0
  90. teradataml/data/grocery_transaction.csv +19 -0
  91. teradataml/data/idwt2d_dataTable.csv +5 -0
  92. teradataml/data/idwt_dataTable.csv +8 -0
  93. teradataml/data/idwt_filterTable.csv +3 -0
  94. teradataml/data/interval_data.csv +5 -0
  95. teradataml/data/jsons/paired_functions.json +14 -0
  96. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  97. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  98. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  99. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  100. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  101. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  102. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  103. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  104. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  105. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  106. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  107. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  108. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  109. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  110. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  111. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  112. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  113. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  114. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  115. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  116. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  117. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  118. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  119. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  120. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  121. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  122. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  123. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  124. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  125. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  126. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  127. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  128. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  129. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  130. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  131. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  132. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  133. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  134. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  135. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  136. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  137. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  138. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  139. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  140. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  141. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  142. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  143. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  144. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  145. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  146. teradataml/data/load_example_data.py +8 -2
  147. teradataml/data/naivebayestextclassifier_example.json +1 -1
  148. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  149. teradataml/data/peppers.png +0 -0
  150. teradataml/data/real_values.csv +14 -0
  151. teradataml/data/sax_example.json +8 -0
  152. teradataml/data/scripts/deploy_script.py +1 -1
  153. teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
  154. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
  155. teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
  156. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  157. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  158. teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
  159. teradataml/data/star_pivot.csv +8 -0
  160. teradataml/data/templates/open_source_ml.json +2 -1
  161. teradataml/data/teradataml_example.json +20 -1
  162. teradataml/data/timestamp_data.csv +4 -0
  163. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  164. teradataml/data/uaf_example.json +55 -1
  165. teradataml/data/unpivot_example.json +15 -0
  166. teradataml/data/url_data.csv +9 -0
  167. teradataml/data/windowdfft.csv +16 -0
  168. teradataml/dataframe/copy_to.py +1 -1
  169. teradataml/dataframe/data_transfer.py +5 -3
  170. teradataml/dataframe/dataframe.py +474 -41
  171. teradataml/dataframe/fastload.py +3 -3
  172. teradataml/dataframe/functions.py +339 -0
  173. teradataml/dataframe/row.py +160 -0
  174. teradataml/dataframe/setop.py +2 -2
  175. teradataml/dataframe/sql.py +658 -20
  176. teradataml/dataframe/window.py +1 -1
  177. teradataml/dbutils/dbutils.py +322 -16
  178. teradataml/geospatial/geodataframe.py +1 -1
  179. teradataml/geospatial/geodataframecolumn.py +1 -1
  180. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  181. teradataml/lib/aed_0_1.dll +0 -0
  182. teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
  183. teradataml/options/__init__.py +3 -1
  184. teradataml/options/configure.py +14 -2
  185. teradataml/options/display.py +2 -2
  186. teradataml/plot/axis.py +4 -4
  187. teradataml/scriptmgmt/UserEnv.py +10 -6
  188. teradataml/scriptmgmt/lls_utils.py +3 -2
  189. teradataml/table_operators/Script.py +2 -2
  190. teradataml/table_operators/TableOperator.py +106 -20
  191. teradataml/table_operators/table_operator_util.py +88 -41
  192. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  193. teradataml/telemetry_utils/__init__.py +0 -0
  194. teradataml/telemetry_utils/queryband.py +52 -0
  195. teradataml/utils/validators.py +1 -1
  196. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
  197. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
  198. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  199. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  200. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -50,7 +50,8 @@ class _FeatureEngineering:
50
50
  model_list,
51
51
  verbose = 0,
52
52
  task_type = "Regression",
53
- custom_data = None):
53
+ custom_data = None,
54
+ **kwargs):
54
55
  """
55
56
  DESCRIPTION:
56
57
  Function initializes the data, target column and columns datatypes
@@ -94,6 +95,28 @@ class _FeatureEngineering:
94
95
  Optional Argument.
95
96
  Specifies json object containing user customized input.
96
97
  Types: json object
98
+
99
+ **kwargs:
100
+ Specifies the additional arguments for feature engineering. Below
101
+ are the additional arguments:
102
+ volatile:
103
+ Optional Argument.
104
+ Specifies whether to put the interim results of the
105
+ functions in a volatile table or not. When set to
106
+ True, results are stored in a volatile table,
107
+ otherwise not.
108
+ Default Value: False
109
+ Types: bool
110
+
111
+ persist:
112
+ Optional Argument.
113
+ Specifies whether to persist the interim results of the
114
+ functions in a table or not. When set to True,
115
+ results are persisted in a table; otherwise,
116
+ results are garbage collected at the end of the
117
+ session.
118
+ Default Value: False
119
+ Types: bool
97
120
  """
98
121
  # Instance variables
99
122
  self.data = data
@@ -108,6 +131,8 @@ class _FeatureEngineering:
108
131
  self.data_transform_dict = {}
109
132
  self.one_hot_obj_count = 0
110
133
  self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
134
+ self.volatile = kwargs.get('volatile', False)
135
+ self.persist = kwargs.get('persist', False)
111
136
 
112
137
  # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
113
138
  def feature_engineering(self,
@@ -133,7 +158,7 @@ class _FeatureEngineering:
133
158
  second element represents list of columns which are not participating in outlier tranformation.
134
159
  """
135
160
  # Assigning number of base jobs for progress bar.
136
- base_jobs = 14 if auto else 18
161
+ base_jobs = 13 if auto else 17
137
162
 
138
163
  # Updating model list based on distinct value of target column for classification type
139
164
  if self.is_classification_type():
@@ -183,9 +208,12 @@ class _FeatureEngineering:
183
208
  self._remove_duplicate_rows()
184
209
  self.progress_bar.update()
185
210
 
211
+ self._anti_select_columns()
212
+ self.progress_bar.update()
213
+
186
214
  self._remove_futile_columns()
187
215
  self.progress_bar.update()
188
-
216
+
189
217
  self._handle_date_columns()
190
218
  self.progress_bar.update()
191
219
 
@@ -206,10 +234,7 @@ class _FeatureEngineering:
206
234
 
207
235
  self._non_linear_transformation()
208
236
  self.progress_bar.update()
209
-
210
- self._anti_select_columns()
211
- self.progress_bar.update()
212
-
237
+
213
238
  return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
214
239
 
215
240
  def _extract_list(self,
@@ -324,12 +349,16 @@ class _FeatureEngineering:
324
349
  if len(categorical_columns) != 0:
325
350
 
326
351
  obj = CategoricalSummary(data=self.data,
327
- target_columns=categorical_columns)
352
+ target_columns=categorical_columns,
353
+ volatile=self.volatile,
354
+ persist=self.persist)
328
355
 
329
356
  gfc_out = GetFutileColumns(data=self.data,
330
357
  object=obj,
331
358
  category_summary_column="ColumnName",
332
- threshold_value =0.7)
359
+ threshold_value =0.7,
360
+ volatile=self.volatile,
361
+ persist=self.persist)
333
362
 
334
363
  # Extracting Futile columns
335
364
  f_cols = [row[0] for row in gfc_out.result.itertuples()]
@@ -402,10 +431,22 @@ class _FeatureEngineering:
402
431
  "accumulate" : accumulate_columns,
403
432
  "persist" : True
404
433
  }
434
+ # Disabling display table name if persist is True by default
435
+ if not self.volatile and not self.persist:
436
+ convertto_params["display_table_name"] = False
437
+
438
+ # Setting persist to False if volatile is True
439
+ if self.volatile:
440
+ convertto_params["persist"] = False
441
+ convertto_params["volatile"] = True
442
+
405
443
  # returning dataset after performing string manipulation
406
444
  self.data = ConvertTo(**convertto_params).result
407
- # Adding transformed data containing table to garbage collector
408
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
445
+
446
+ # IF volatile is False and persist is False
447
+ if not self.volatile and not self.persist:
448
+ # Adding transformed data containing table to garbage collector
449
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
409
450
  return new_date_components
410
451
 
411
452
  def _handle_date_columns_helper(self):
@@ -526,7 +567,9 @@ class _FeatureEngineering:
526
567
  self.data = self.data.dropna(subset=[self.target_column])
527
568
 
528
569
  obj = ColumnSummary(data=self.data,
529
- target_columns=self.data.columns)
570
+ target_columns=self.data.columns,
571
+ volatile=self.volatile,
572
+ persist=self.persist)
530
573
 
531
574
  cols_miss_val={}
532
575
  # Iterating over each row in the column summary result
@@ -697,12 +740,16 @@ class _FeatureEngineering:
697
740
 
698
741
  fit_obj = SimpleImputeFit(data=self.data,
699
742
  stats_columns=col_stat,
700
- stats=stat)
743
+ stats=stat,
744
+ volatile=self.volatile,
745
+ persist=self.persist)
701
746
 
702
747
  # Storing fit object for imputation in data transform dictionary
703
748
  self.data_transform_dict['imputation_fit_object'] = fit_obj.output
704
749
  sm = SimpleImputeTransform(data=self.data,
705
- object=fit_obj)
750
+ object=fit_obj,
751
+ volatile=self.volatile,
752
+ persist=self.persist)
706
753
 
707
754
  self.data = sm.result
708
755
  self._display_msg(msg="Sample of dataset after Imputation:",
@@ -735,6 +782,8 @@ class _FeatureEngineering:
735
782
  drop_col_ind = missing_handling_param.get("DroppingColumnIndicator", False)
736
783
  drop_row_ind = missing_handling_param.get("DroppingRowIndicator", False)
737
784
  impute_ind = missing_handling_param.get("ImputeMissingIndicator", False)
785
+ volatile = missing_handling_param.pop("volatile", False)
786
+ persist = missing_handling_param.pop("persist", False)
738
787
  # Checking for user input if all methods indicator are false or not
739
788
  if not any([drop_col_ind, drop_row_ind, impute_ind]):
740
789
  self._display_msg(inline_msg="No method information provided for performing customized missing value handling. \
@@ -791,7 +840,9 @@ class _FeatureEngineering:
791
840
  "stats_columns" : stat_list,
792
841
  "stats" : stat_method,
793
842
  "literals_columns" : literal_list,
794
- "literals" : literal_value
843
+ "literals" : literal_value,
844
+ "volatile" : volatile,
845
+ "persist" : persist
795
846
  }
796
847
  # Fitting on dataset
797
848
  fit_obj = SimpleImputeFit(**fit_param)
@@ -804,10 +855,18 @@ class _FeatureEngineering:
804
855
  "object" : fit_obj.output,
805
856
  "persist" : True
806
857
  }
858
+ # Disabling display table name if persist is True by default
859
+ if not volatile and not persist:
860
+ transform_param["display_table_name"] = False
861
+
862
+ if volatile:
863
+ transform_param["volatile"] = True
864
+ transform_param["persist"] = False
807
865
  # Updating dataset with transform result
808
866
  self.data = SimpleImputeTransform(**transform_param).result
809
- # Adding transformed data containing table to garbage collector
810
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
867
+ if not volatile and not persist:
868
+ # Adding transformed data containing table to garbage collector
869
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
811
870
  self._display_msg(msg="Updated dataset sample after performing customized missing value imputation:",
812
871
  data=self.data,
813
872
  progress_bar=self.progress_bar)
@@ -848,6 +907,8 @@ class _FeatureEngineering:
848
907
  equal_width_bin_columns = []
849
908
  var_width_bin_list = []
850
909
  var_width_bin_columns = []
910
+ volatile = extracted_col.pop("volatile", False)
911
+ persist = extracted_col.pop("persist", False)
851
912
 
852
913
  # Checking for column present in dataset or not
853
914
  _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "BincodeParam", self.data, "df")
@@ -881,7 +942,9 @@ class _FeatureEngineering:
881
942
  "data" : self.data,
882
943
  "target_columns": equal_width_bin_columns,
883
944
  "method_type" : "Equal-Width",
884
- "nbins" : bins
945
+ "nbins" : bins,
946
+ "volatile" : volatile,
947
+ "persist" : persist
885
948
  }
886
949
  eql_bin_code_fit = BincodeFit(**fit_params)
887
950
  # Storing fit object and column list for Equal-Width binning in data transform dictionary
@@ -894,11 +957,19 @@ class _FeatureEngineering:
894
957
  "data" : self.data,
895
958
  "object" : eql_bin_code_fit.output,
896
959
  "accumulate" : accumulate_columns,
897
- "persist" : True,
960
+ "persist" : True
898
961
  }
962
+ # Disabling display table name if persist is True by default
963
+ if not volatile and not persist:
964
+ eql_transform_params["display_table_name"] = False
965
+
966
+ if volatile:
967
+ eql_transform_params["volatile"] = True
968
+ eql_transform_params["persist"] = False
899
969
  self.data = BincodeTransform(**eql_transform_params).result
900
- # Adding transformed data containing table to garbage collector
901
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
970
+ if not volatile and not persist:
971
+ # Adding transformed data containing table to garbage collector
972
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
902
973
  self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
903
974
  data=self.data,
904
975
  progress_bar=self.progress_bar)
@@ -923,7 +994,9 @@ class _FeatureEngineering:
923
994
  "maxvalue_column" : "MaxValue",
924
995
  "label_column" : "Label",
925
996
  "method_type" : "Variable-Width",
926
- "label_prefix" : "label_prefix"
997
+ "label_prefix" : "label_prefix",
998
+ "volatile" : volatile,
999
+ "persist" : persist
927
1000
  }
928
1001
  var_bin_code_fit = BincodeFit(**fit_params)
929
1002
  # Storing fit object and column list for Variable-Width binning in data transform dictionary
@@ -935,11 +1008,19 @@ class _FeatureEngineering:
935
1008
  "object" : var_bin_code_fit.output,
936
1009
  "object_order_column" : "TD_MinValue_BINFIT",
937
1010
  "accumulate" : accumulate_columns,
938
- "persist" : True
1011
+ "persist" : True
939
1012
  }
1013
+ # Disabling display table name if persist is True by default
1014
+ if not volatile and not persist:
1015
+ var_transform_params["display_table_name"] = False
1016
+
1017
+ if volatile:
1018
+ var_transform_params["volatile"] = True
1019
+ var_transform_params["persist"] = False
940
1020
  self.data = BincodeTransform(**var_transform_params).result
941
- # Adding transformed data containing table to garbage collector
942
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1021
+ if not volatile and not persist:
1022
+ # Adding transformed data containing table to garbage collector
1023
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
943
1024
  self._display_msg(msg="Updated dataset sample after performing Variable-Width binning:",
944
1025
  data=self.data,
945
1026
  progress_bar=self.progress_bar)
@@ -963,11 +1044,13 @@ class _FeatureEngineering:
963
1044
  # Storing custom string manipulation indicator in data transform dictionary
964
1045
  self.data_transform_dict['custom_string_manipulation_ind'] = True
965
1046
  # Fetching list required for performing operation.
966
- extracted_col = self.custom_data.get("StringManipulationParam", None)
1047
+ extracted_col = self.custom_data.get("StringManipulationParam", None).copy()
967
1048
  if not extracted_col:
968
1049
  self._display_msg(inline_msg="No information provided for performing string manipulation.",
969
1050
  progress_bar=self.progress_bar)
970
1051
  else:
1052
+ volatile = extracted_col.pop("volatile", False)
1053
+ persist = extracted_col.pop("persist", False)
971
1054
  # Checking for column present in dataset or not
972
1055
  _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "StringManipulationParam", self.data, "df")
973
1056
 
@@ -980,8 +1063,9 @@ class _FeatureEngineering:
980
1063
  data=self.data,
981
1064
  progress_bar=self.progress_bar)
982
1065
  else:
983
- self._display_msg(inline_msg="Skipping customized string manipulation.")
984
-
1066
+ self._display_msg(inline_msg="Skipping customized string manipulation.",
1067
+ progress_bar=self.progress_bar)
1068
+
985
1069
  def _str_method_mapping(self,
986
1070
  target_col,
987
1071
  transform_val):
@@ -1010,7 +1094,11 @@ class _FeatureEngineering:
1010
1094
 
1011
1095
  # Fetching required parameters from json object
1012
1096
  string_operation = transform_val["StringOperation"]
1013
-
1097
+
1098
+ # Setting volatile and persist parameters for performing string manipulation
1099
+ volatile, persist = self._set_generic_parameters(func_indicator="StringManipulationIndicator",
1100
+ param_name="StringManipulationParam")
1101
+
1014
1102
  # Storing general parameters for performing string transformation
1015
1103
  fit_params = {
1016
1104
  "data" : self.data,
@@ -1020,6 +1108,14 @@ class _FeatureEngineering:
1020
1108
  "inplace" : True,
1021
1109
  "persist" : True
1022
1110
  }
1111
+ # Disabling display table name if persist is True by default
1112
+ if not volatile and not persist:
1113
+ fit_params["display_table_name"] = False
1114
+
1115
+ if volatile:
1116
+ fit_params["volatile"] = True
1117
+ fit_params["persist"] = False
1118
+
1023
1119
  # Adding additional parameters based on string operation type
1024
1120
  if string_operation in ["StringCon", "StringTrim"]:
1025
1121
  string_argument = transform_val["String"]
@@ -1040,13 +1136,14 @@ class _FeatureEngineering:
1040
1136
 
1041
1137
  # returning dataset after performing string manipulation
1042
1138
  transform_output = StrApply(**fit_params).result
1043
- # Adding transformed data containing table to garbage collector
1044
- GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1139
+ if not volatile and not persist:
1140
+ # Adding transformed data containing table to garbage collector
1141
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1045
1142
  return transform_output
1046
1143
 
1047
1144
  def _one_hot_encoding(self,
1048
- one_hot_columns,
1049
- unique_counts):
1145
+ one_hot_columns,
1146
+ unique_counts):
1050
1147
  """
1051
1148
  DESCRIPTION:
1052
1149
  Function performs the one hot encoding to categorcial columns/features in the dataset.
@@ -1060,12 +1157,16 @@ class _FeatureEngineering:
1060
1157
  unique_counts:
1061
1158
  Required Argument.
1062
1159
  Specifies the unique counts in the categorical columns.
1063
- Types: int or list of integer (int)
1064
-
1160
+ Types: int or list of integer (int)
1065
1161
  """
1066
1162
  # TD function will add extra column_other in onehotEncoding, so
1067
1163
  # initailizing this list to remove those extra columns
1068
1164
  drop_lst = [ele + "_other" for ele in one_hot_columns]
1165
+
1166
+ # Setting volatile and persist parameters for performing encoding
1167
+ volatile, persist = self._set_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1168
+ param_name="CategoricalEncodingParam")
1169
+
1069
1170
  # Adding fit parameters for performing encoding
1070
1171
  fit_params = {
1071
1172
  "data" : self.data,
@@ -1073,7 +1174,9 @@ class _FeatureEngineering:
1073
1174
  "is_input_dense" : True,
1074
1175
  "target_column" : one_hot_columns,
1075
1176
  "category_counts" : unique_counts,
1076
- "other_column" : "other"
1177
+ "other_column" : "other",
1178
+ "volatile" : volatile,
1179
+ "persist" : persist
1077
1180
  }
1078
1181
  # Performing one hot encoding fit on target columns
1079
1182
  fit_obj = OneHotEncodingFit(**fit_params)
@@ -1089,10 +1192,21 @@ class _FeatureEngineering:
1089
1192
  "is_input_dense" : True,
1090
1193
  "persist" : True
1091
1194
  }
1195
+ # Disabling display table name if persist is True by default
1196
+ if not volatile and not persist:
1197
+ transform_params["display_table_name"] = False
1198
+
1199
+ # Setting persist to False if volatile is True
1200
+ if volatile:
1201
+ transform_params["volatile"] = True
1202
+ transform_params["persist"] = False
1203
+
1092
1204
  # Performing one hot encoding transformation
1093
1205
  transform_output = OneHotEncodingTransform(**transform_params).result
1094
- # Adding transformed data containing table to garbage collector
1095
- GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1206
+
1207
+ if not volatile and not persist:
1208
+ # Adding transformed data containing table to garbage collector
1209
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1096
1210
  self.data = transform_output.drop(drop_lst, axis=1)
1097
1211
 
1098
1212
  def _ordinal_encoding(self,
@@ -1107,10 +1221,16 @@ class _FeatureEngineering:
1107
1221
  Specifies the categorical columns for which ordinal encoding will be performed.
1108
1222
  Types: str or list of strings (str)
1109
1223
  """
1224
+ # Setting volatile and persist parameters for performing encoding
1225
+ volatile, persist = self._set_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1226
+ param_name="CategoricalEncodingParam")
1227
+
1110
1228
  # Adding fit parameters for performing encoding
1111
1229
  fit_params = {
1112
1230
  "data" : self.data,
1113
- "target_column" : ordinal_columns
1231
+ "target_column" : ordinal_columns,
1232
+ "volatile" : volatile,
1233
+ "persist" : persist
1114
1234
  }
1115
1235
  # Performing ordinal encoding fit on target columns
1116
1236
  ord_fit_obj = OrdinalEncodingFit(**fit_params)
@@ -1130,17 +1250,27 @@ class _FeatureEngineering:
1130
1250
  "accumulate" : accumulate_columns,
1131
1251
  "persist" : True
1132
1252
  }
1253
+ # Disabling display table name if persist is True by default
1254
+ if not volatile and not persist:
1255
+ transform_params["display_table_name"] = False
1256
+
1257
+ # Setting persist to False if volatile is True
1258
+ if volatile:
1259
+ transform_params["volatile"] = True
1260
+ transform_params["persist"] = False
1133
1261
  # Performing ordinal encoding transformation
1134
1262
  self.data = OrdinalEncodingTransform(**transform_params).result
1135
- # Adding transformed data containing table to garbage collector
1136
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1263
+
1264
+ if not volatile and not persist:
1265
+ # Adding transformed data containing table to garbage collector
1266
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1137
1267
 
1138
1268
  if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
1139
1269
  self.target_label = ord_fit_obj
1140
1270
 
1141
1271
 
1142
1272
  def _target_encoding(self,
1143
- target_encoding_list):
1273
+ target_encoding_list):
1144
1274
  """
1145
1275
  DESCRIPTION:
1146
1276
  Function performs the target encoding to categorcial columns/features in the dataset.
@@ -1165,6 +1295,11 @@ class _FeatureEngineering:
1165
1295
  # Storing indicator and fit object for target encoding in data transform dictionary
1166
1296
  self.data_transform_dict["custom_target_encoding_ind"] = True
1167
1297
  self.data_transform_dict["custom_target_encoding_fit_obj"] = {}
1298
+
1299
+ # Setting volatile and persist parameters for performing encoding
1300
+ volatile, persist = self._set_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1301
+ param_name="CategoricalEncodingParam")
1302
+
1168
1303
  # Fetching required argument for performing target encoding
1169
1304
  for col,transform_val in target_encoding_list.items():
1170
1305
  encoder_method = transform_val["encoder_method"]
@@ -1175,7 +1310,9 @@ class _FeatureEngineering:
1175
1310
  "category_data" : category_data,
1176
1311
  "encoder_method" : encoder_method,
1177
1312
  "target_columns" : col,
1178
- "response_column" : response_column
1313
+ "response_column" : response_column,
1314
+ "volatile" : volatile,
1315
+ "persist" : persist
1179
1316
  }
1180
1317
  if encoder_method == "CBM_DIRICHLET":
1181
1318
  num_distinct_responses=transform_val["num_distinct_responses"]
@@ -1184,7 +1321,7 @@ class _FeatureEngineering:
1184
1321
  # Performing target encoding fit on target columns
1185
1322
  tar_fit_obj = TargetEncodingFit(**fit_params)
1186
1323
  # Storing each column fit object for target encoding in data transform dictionary
1187
- self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj})
1324
+ self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj.result})
1188
1325
  # Extracting accumulate columns
1189
1326
  accumulate_columns = self._extract_list(self.data.columns, [col])
1190
1327
  # Adding transform parameters for performing encoding
@@ -1192,12 +1329,21 @@ class _FeatureEngineering:
1192
1329
  "data" : self.data,
1193
1330
  "object" : tar_fit_obj,
1194
1331
  "accumulate" : accumulate_columns,
1195
- "persist" : True
1332
+ "persist" : True
1196
1333
  }
1334
+
1335
+ # Disabling display table name if persist is True by default
1336
+ if not volatile and not persist:
1337
+ transform_params["display_table_name"] = False
1338
+
1339
+ if volatile:
1340
+ transform_params["volatile"] = True
1341
+ transform_params["persist"] = False
1197
1342
  # Performing ordinal encoding transformation
1198
1343
  self.data = TargetEncodingTransform(**transform_params).result
1199
- # Adding transformed data containing table to garbage collector
1200
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1344
+ if not volatile and not persist:
1345
+ # Adding transformed data containing table to garbage collector
1346
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1201
1347
 
1202
1348
  def _encoding_categorical_columns(self):
1203
1349
  """
@@ -1262,8 +1408,10 @@ class _FeatureEngineering:
1262
1408
  # Storing custom categorical encoding indicator in data transform dictionary
1263
1409
  self.data_transform_dict["custom_categorical_encoding_ind"] = True
1264
1410
  # Fetching user input list for performing
1265
- encoding_list = self.custom_data.get("CategoricalEncodingParam", None)
1411
+ encoding_list = self.custom_data.get("CategoricalEncodingParam", None).copy()
1266
1412
  if encoding_list:
1413
+ volatile = encoding_list.pop("volatile", False)
1414
+ persist = encoding_list.pop("persist", False)
1267
1415
  onehot_encode_ind = encoding_list.get("OneHotEncodingIndicator", False)
1268
1416
  ordinal_encode_ind = encoding_list.get("OrdinalEncodingIndicator", False)
1269
1417
  target_encode_ind = encoding_list.get("TargetEncodingIndicator", False)
@@ -1340,11 +1488,25 @@ class _FeatureEngineering:
1340
1488
  """
1341
1489
  DESCRIPTION:
1342
1490
  Function to perform different numerical transformations using NumApply on numerical features based on user input.
1343
-
1491
+
1492
+ PARAMETERS:
1493
+ target_col:
1494
+ Required Argument.
1495
+ Specifies the numerical column for which transformation will be performed.
1496
+ Types: str
1497
+
1498
+ transform_val:
1499
+ Required Argument.
1500
+ Specifies different parameter require for applying numerical transformation.
1501
+ Types: dict
1344
1502
  """
1345
1503
  # Fetching columns for accumulation
1346
1504
  accumulate_columns = self._extract_list(self.data.columns, [target_col])
1347
1505
  apply_method = transform_val["apply_method"]
1506
+
1507
+ # Setting volatile and persist parameters for performing transformation
1508
+ volatile, persist = self._set_generic_parameters(func_indicator="MathameticalTransformationIndicator",
1509
+ param_name="MathameticalTransformationParam")
1348
1510
  # Adding fit parameters for performing transformation
1349
1511
  fit_params={
1350
1512
  "data": self.data,
@@ -1354,17 +1516,25 @@ class _FeatureEngineering:
1354
1516
  "persist" :True,
1355
1517
  "accumulate" : accumulate_columns
1356
1518
  }
1519
+ # Disabling display table name if persist is True by default
1520
+ if not volatile and not persist:
1521
+ fit_params["display_table_name"] = False
1522
+
1523
+ if volatile:
1524
+ fit_params["volatile"] = True
1525
+ fit_params["persist"] = False
1357
1526
  # Adding addition details for fit parameters in case of SIGMOID transformation
1358
1527
  if apply_method == "sigmoid":
1359
1528
  sigmoid_style=transform_val["sigmoid_style"]
1360
1529
  fit_params = {**fit_params, "sigmoid_style" : sigmoid_style}
1361
1530
  # Performing transformation on target columns
1362
1531
  transform_output = NumApply(**fit_params).result
1363
- # Adding transformed data containing table to garbage collector
1364
- GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1532
+ if not volatile and not persist:
1533
+ # Adding transformed data containing table to garbage collector
1534
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1365
1535
  return transform_output
1366
1536
 
1367
- def _numerical_transformation(self, target_columns, num_transform_data):
1537
+ def _numerical_transformation(self, target_columns, num_transform_data, volatile, persist):
1368
1538
  """
1369
1539
  DESCRIPTION:
1370
1540
  Function to perform different numerical transformations using Fit and Transform on numerical features based on user input.
@@ -1374,7 +1544,9 @@ class _FeatureEngineering:
1374
1544
  fit_params={
1375
1545
  "data" : self.data,
1376
1546
  "object" : num_transform_data,
1377
- "object_order_column" : "TargetColumn"
1547
+ "object_order_column" : "TargetColumn",
1548
+ "volatile" : volatile,
1549
+ "persist" : persist
1378
1550
  }
1379
1551
  # Peforming fit with all arguments.
1380
1552
  num_fit_obj = Fit(**fit_params)
@@ -1392,10 +1564,18 @@ class _FeatureEngineering:
1392
1564
  "id_columns" : id_columns,
1393
1565
  "persist" :True
1394
1566
  }
1567
+ # Disabling display table name if persist is True by default
1568
+ if not volatile and not persist:
1569
+ transform_params["display_table_name"] = False
1570
+
1571
+ if volatile:
1572
+ transform_params["volatile"] = True
1573
+ transform_params["persist"] = False
1395
1574
  # Peforming transformation on target columns
1396
1575
  self.data = Transform(**transform_params).result
1397
- # Adding transformed data containing table to garbage collector
1398
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1576
+ if not volatile and not persist:
1577
+ # Adding transformed data containing table to garbage collector
1578
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1399
1579
  self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
1400
1580
  data=self.data,
1401
1581
  progress_bar=self.progress_bar)
@@ -1414,8 +1594,11 @@ class _FeatureEngineering:
1414
1594
  # Checking user input for mathematical transformations
1415
1595
  if mat_transform_input:
1416
1596
  # Extracting list required for mathematical transformations
1417
- mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None)
1597
+ mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None).copy()
1598
+
1418
1599
  if mat_transform_list:
1600
+ volatile = mat_transform_list.pop("volatile", False)
1601
+ persist = mat_transform_list.pop("persist", False)
1419
1602
  # Checking for column present in dataset or not
1420
1603
  _Validators._validate_dataframe_has_argument_columns(list(mat_transform_list.keys()),
1421
1604
  "MathameticalTransformationParam", self.data, "df")
@@ -1459,7 +1642,7 @@ class _FeatureEngineering:
1459
1642
  copy_to_sql(df=transform_data, table_name="automl_num_transform_data", temporary=True)
1460
1643
  num_transform_data = DataFrame.from_table("automl_num_transform_data")
1461
1644
  # Applying transformation using Fit/Transform functions
1462
- self._numerical_transformation(target_columns, num_transform_data)
1645
+ self._numerical_transformation(target_columns, num_transform_data, volatile, persist)
1463
1646
  # Storing custom numerical transformation parameters and column list in data transform dictionary
1464
1647
  self.data_transform_dict['custom_numerical_transformation_col'] = target_columns
1465
1648
  self.data_transform_dict['custom_numerical_transformation_params'] = num_transform_data
@@ -1485,6 +1668,8 @@ class _FeatureEngineering:
1485
1668
  nl_transform_list = self.custom_data.get("NonLinearTransformationParam", None)
1486
1669
  # Extracting list required for non-linear transformation
1487
1670
  if nl_transform_list:
1671
+ volatile = nl_transform_list.pop("volatile", False)
1672
+ persist = nl_transform_list.pop("persist", False)
1488
1673
  total_combination = len(nl_transform_list)
1489
1674
  # Generating all possible combination names
1490
1675
  possible_combination = ["Combination_"+str(counter) for counter in range(1,total_combination+1)]
@@ -1511,12 +1696,14 @@ class _FeatureEngineering:
1511
1696
  "data" : self.data,
1512
1697
  "target_columns" : target_columns,
1513
1698
  "formula" : formula,
1514
- "result_column" : result_column
1699
+ "result_column" : result_column,
1700
+ "volatile" : volatile,
1701
+ "persist" : persist
1515
1702
  }
1516
1703
  # Performing fit on dataset
1517
1704
  fit_obj = NonLinearCombineFit(**fit_param)
1518
1705
  # Updating it for each non-linear combination
1519
- self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj})
1706
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj.result})
1520
1707
  # Adding transform params for transformation
1521
1708
  transform_params = {
1522
1709
  "data" : self.data,
@@ -1524,9 +1711,18 @@ class _FeatureEngineering:
1524
1711
  "accumulate" : self.data.columns,
1525
1712
  "persist" : True
1526
1713
  }
1714
+ # Disabling display table name if persist is True by default
1715
+ if not volatile and not persist:
1716
+ transform_params["display_table_name"] = False
1717
+
1718
+ if volatile:
1719
+ transform_params["volatile"] = True
1720
+ transform_params["persist"] = False
1527
1721
  self.data = NonLinearCombineTransform(**transform_params).result
1528
- # Adding transformed data containing table to garbage collector
1529
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1722
+
1723
+ if not volatile and not persist:
1724
+ # Adding transformed data containing table to garbage collector
1725
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1530
1726
  else:
1531
1727
  self._display_msg(inline_msg="Combinations are not as per expectation.",
1532
1728
  progress_bar=self.progress_bar)
@@ -1552,29 +1748,64 @@ class _FeatureEngineering:
1552
1748
  anti_select_input = self.custom_data.get("AntiselectIndicator", False)
1553
1749
  # Checking user input for anti-select columns
1554
1750
  if anti_select_input:
1555
- # Extracting list required for anti-select columns
1556
- anti_select_list = self.custom_data.get("AntiselectParam", None)
1557
- if(anti_select_list):
1558
- if all(item in self.data.columns for item in anti_select_list):
1559
- # Storing custom anti-select columns indicator and column list in data transform dictionary
1560
- self.data_transform_dict['custom_anti_select_columns_ind'] = True
1561
- self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
1562
- fit_params = {
1563
- "data" : self.data,
1564
- "exclude" : anti_select_list
1565
- }
1566
- # Performing transformation for given user input
1567
- self.data = Antiselect(**fit_params).result
1568
- self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
1569
- data=self.data,
1570
- progress_bar=self.progress_bar)
1571
- else:
1572
- self._display_msg(msg="Columns provided in list are not present in dataset:",
1573
- col_lst=anti_select_list,
1574
- progress_bar=self.progress_bar)
1751
+ anti_select_params = self.custom_data.get("AntiselectParam", None)
1752
+ if anti_select_params:
1753
+ # Extracting list required for anti-select columns
1754
+ anti_select_list = anti_select_params.get("excluded_columns", None)
1755
+ volatile = anti_select_params.get("volatile", False)
1756
+ persist = anti_select_params.get("persist", False)
1757
+ if(anti_select_list):
1758
+ if all(item in self.data.columns for item in anti_select_list):
1759
+ # Storing custom anti-select columns indicator and column list in data transform dictionary
1760
+ self.data_transform_dict['custom_anti_select_columns_ind'] = True
1761
+ self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
1762
+ fit_params = {
1763
+ "data" : self.data,
1764
+ "exclude" : anti_select_list,
1765
+ "volatile" : volatile,
1766
+ "persist" : persist
1767
+ }
1768
+ # Performing transformation for given user input
1769
+ self.data = Antiselect(**fit_params).result
1770
+ self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
1771
+ data=self.data,
1772
+ progress_bar=self.progress_bar)
1773
+ else:
1774
+ self._display_msg(msg="Columns provided in list are not present in dataset:",
1775
+ col_lst=anti_select_list,
1776
+ progress_bar=self.progress_bar)
1575
1777
  else:
1576
1778
  self._display_msg(inline_msg="No information provided for performing anti-select columns operation.",
1577
1779
  progress_bar=self.progress_bar)
1578
1780
  else:
1579
1781
  self._display_msg(inline_msg="Skipping customized anti-select columns.",
1580
- progress_bar=self.progress_bar)
1782
+ progress_bar=self.progress_bar)
1783
+
1784
+ def _set_generic_parameters(self,
1785
+ func_indicator=None,
1786
+ param_name=None):
1787
+ """
1788
+ DESCRIPTION:
1789
+ Function to set generic parameters.
1790
+
1791
+ PARAMETERS:
1792
+ func_indicator:
1793
+ Optional Argument.
1794
+ Specifies the name of function indicator.
1795
+ Types: str
1796
+
1797
+ param_name:
1798
+ Optional Argument.
1799
+ Specifies the name of the param which contains generic parameters.
1800
+ Types: str
1801
+
1802
+ RETURNS:
1803
+ Tuple containing volatile and persist parameters.
1804
+ """
1805
+ volatile = self.volatile
1806
+ persist = self.persist
1807
+ if self.custom_data is not None and self.custom_data.get(func_indicator, False):
1808
+ volatile = self.custom_data[param_name].get("volatile", False)
1809
+ persist = self.custom_data[param_name].get("persist", False)
1810
+
1811
+ return (volatile, persist)