teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -40,6 +40,7 @@ from teradataml.common.garbagecollector import GarbageCollector
40
40
  from teradataml.dataframe.sql_functions import case
41
41
  from teradataml.hyperparameter_tuner.utils import _ProgressBar
42
42
  from teradataml.utils.validators import _Validators
43
+ from teradataml.common.utils import UtilFuncs
43
44
 
44
45
 
45
46
  class _FeatureEngineering:
@@ -50,7 +51,8 @@ class _FeatureEngineering:
50
51
  model_list,
51
52
  verbose = 0,
52
53
  task_type = "Regression",
53
- custom_data = None):
54
+ custom_data = None,
55
+ **kwargs):
54
56
  """
55
57
  DESCRIPTION:
56
58
  Function initializes the data, target column and columns datatypes
@@ -94,6 +96,28 @@ class _FeatureEngineering:
94
96
  Optional Argument.
95
97
  Specifies json object containing user customized input.
96
98
  Types: json object
99
+
100
+ **kwargs:
101
+ Specifies the additional arguments for feature engineering. Below
102
+ are the additional arguments:
103
+ volatile:
104
+ Optional Argument.
105
+ Specifies whether to put the interim results of the
106
+ functions in a volatile table or not. When set to
107
+ True, results are stored in a volatile table,
108
+ otherwise not.
109
+ Default Value: False
110
+ Types: bool
111
+
112
+ persist:
113
+ Optional Argument.
114
+ Specifies whether to persist the interim results of the
115
+ functions in a table or not. When set to True,
116
+ results are persisted in a table; otherwise,
117
+ results are garbage collected at the end of the
118
+ session.
119
+ Default Value: False
120
+ Types: bool
97
121
  """
98
122
  # Instance variables
99
123
  self.data = data
@@ -108,6 +132,8 @@ class _FeatureEngineering:
108
132
  self.data_transform_dict = {}
109
133
  self.one_hot_obj_count = 0
110
134
  self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
135
+ self.volatile = kwargs.get('volatile', False)
136
+ self.persist = kwargs.get('persist', False)
111
137
 
112
138
  # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
113
139
  def feature_engineering(self,
@@ -133,7 +159,7 @@ class _FeatureEngineering:
133
159
  second element represents list of columns which are not participating in outlier tranformation.
134
160
  """
135
161
  # Assigning number of base jobs for progress bar.
136
- base_jobs = 14 if auto else 18
162
+ base_jobs = 13 if auto else 17
137
163
 
138
164
  # Updating model list based on distinct value of target column for classification type
139
165
  if self.is_classification_type():
@@ -183,9 +209,12 @@ class _FeatureEngineering:
183
209
  self._remove_duplicate_rows()
184
210
  self.progress_bar.update()
185
211
 
212
+ self._anti_select_columns()
213
+ self.progress_bar.update()
214
+
186
215
  self._remove_futile_columns()
187
216
  self.progress_bar.update()
188
-
217
+
189
218
  self._handle_date_columns()
190
219
  self.progress_bar.update()
191
220
 
@@ -206,10 +235,7 @@ class _FeatureEngineering:
206
235
 
207
236
  self._non_linear_transformation()
208
237
  self.progress_bar.update()
209
-
210
- self._anti_select_columns()
211
- self.progress_bar.update()
212
-
238
+
213
239
  return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
214
240
 
215
241
  def _extract_list(self,
@@ -248,7 +274,7 @@ class _FeatureEngineering:
248
274
  show_data=True)
249
275
  start_time = time.time()
250
276
  rows = self.data.shape[0]
251
- self.data=self.data.drop_duplicate()
277
+ self.data=self.data.drop_duplicate(self.data.columns)
252
278
  if rows != self.data.shape[0]:
253
279
  self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
254
280
  data=self.data,
@@ -324,12 +350,16 @@ class _FeatureEngineering:
324
350
  if len(categorical_columns) != 0:
325
351
 
326
352
  obj = CategoricalSummary(data=self.data,
327
- target_columns=categorical_columns)
353
+ target_columns=categorical_columns,
354
+ volatile=self.volatile,
355
+ persist=self.persist)
328
356
 
329
357
  gfc_out = GetFutileColumns(data=self.data,
330
358
  object=obj,
331
359
  category_summary_column="ColumnName",
332
- threshold_value =0.7)
360
+ threshold_value =0.7,
361
+ volatile=self.volatile,
362
+ persist=self.persist)
333
363
 
334
364
  # Extracting Futile columns
335
365
  f_cols = [row[0] for row in gfc_out.result.itertuples()]
@@ -402,10 +432,22 @@ class _FeatureEngineering:
402
432
  "accumulate" : accumulate_columns,
403
433
  "persist" : True
404
434
  }
435
+ # Disabling display table name if persist is True by default
436
+ if not self.volatile and not self.persist:
437
+ convertto_params["display_table_name"] = False
438
+
439
+ # Setting persist to False if volatile is True
440
+ if self.volatile:
441
+ convertto_params["persist"] = False
442
+ convertto_params["volatile"] = True
443
+
405
444
  # returning dataset after performing string manipulation
406
445
  self.data = ConvertTo(**convertto_params).result
407
- # Adding transformed data containing table to garbage collector
408
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
446
+
447
+ # IF volatile is False and persist is False
448
+ if not self.volatile and not self.persist:
449
+ # Adding transformed data containing table to garbage collector
450
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
409
451
  return new_date_components
410
452
 
411
453
  def _handle_date_columns_helper(self):
@@ -524,9 +566,18 @@ class _FeatureEngineering:
524
566
 
525
567
  # Removing rows with missing target column value
526
568
  self.data = self.data.dropna(subset=[self.target_column])
569
+
570
+ params = {
571
+ "data": self.data,
572
+ "target_columns": self.data.columns,
573
+ "persist": True,
574
+ "display_table_name": False
575
+ }
527
576
 
528
- obj = ColumnSummary(data=self.data,
529
- target_columns=self.data.columns)
577
+ obj = ColumnSummary(**params)
578
+
579
+ # Adding transformed data containing table to garbage collector
580
+ GarbageCollector._add_to_garbagecollector(obj.result._table_name)
530
581
 
531
582
  cols_miss_val={}
532
583
  # Iterating over each row in the column summary result
@@ -661,7 +712,7 @@ class _FeatureEngineering:
661
712
  for key, val in self.imputation_cols.items():
662
713
 
663
714
  col_stat.append(key)
664
- if self.data_types[key] in ['float', 'int']:
715
+ if self.data_types[key] in ['float', 'int', 'decimal.Decimal']:
665
716
  val = skew_data[f'skew_{key}']
666
717
  # Median imputation method, if abs(skewness value) > 1
667
718
  if abs(val) > 1:
@@ -670,7 +721,7 @@ class _FeatureEngineering:
670
721
  else:
671
722
  stat.append('mean')
672
723
  # Mode imputation method, if categorical column
673
- else:
724
+ elif self.data_types[key] in ['str']:
674
725
  stat.append('mode')
675
726
 
676
727
  self._display_msg(msg="Columns with their imputation method:",
@@ -697,12 +748,16 @@ class _FeatureEngineering:
697
748
 
698
749
  fit_obj = SimpleImputeFit(data=self.data,
699
750
  stats_columns=col_stat,
700
- stats=stat)
751
+ stats=stat,
752
+ volatile=self.volatile,
753
+ persist=self.persist)
701
754
 
702
755
  # Storing fit object for imputation in data transform dictionary
703
756
  self.data_transform_dict['imputation_fit_object'] = fit_obj.output
704
757
  sm = SimpleImputeTransform(data=self.data,
705
- object=fit_obj)
758
+ object=fit_obj,
759
+ volatile=self.volatile,
760
+ persist=self.persist)
706
761
 
707
762
  self.data = sm.result
708
763
  self._display_msg(msg="Sample of dataset after Imputation:",
@@ -735,6 +790,8 @@ class _FeatureEngineering:
735
790
  drop_col_ind = missing_handling_param.get("DroppingColumnIndicator", False)
736
791
  drop_row_ind = missing_handling_param.get("DroppingRowIndicator", False)
737
792
  impute_ind = missing_handling_param.get("ImputeMissingIndicator", False)
793
+ volatile = missing_handling_param.pop("volatile", False)
794
+ persist = missing_handling_param.pop("persist", False)
738
795
  # Checking for user input if all methods indicator are false or not
739
796
  if not any([drop_col_ind, drop_row_ind, impute_ind]):
740
797
  self._display_msg(inline_msg="No method information provided for performing customized missing value handling. \
@@ -791,7 +848,9 @@ class _FeatureEngineering:
791
848
  "stats_columns" : stat_list,
792
849
  "stats" : stat_method,
793
850
  "literals_columns" : literal_list,
794
- "literals" : literal_value
851
+ "literals" : literal_value,
852
+ "volatile" : volatile,
853
+ "persist" : persist
795
854
  }
796
855
  # Fitting on dataset
797
856
  fit_obj = SimpleImputeFit(**fit_param)
@@ -804,10 +863,18 @@ class _FeatureEngineering:
804
863
  "object" : fit_obj.output,
805
864
  "persist" : True
806
865
  }
866
+ # Disabling display table name if persist is True by default
867
+ if not volatile and not persist:
868
+ transform_param["display_table_name"] = False
869
+
870
+ if volatile:
871
+ transform_param["volatile"] = True
872
+ transform_param["persist"] = False
807
873
  # Updating dataset with transform result
808
874
  self.data = SimpleImputeTransform(**transform_param).result
809
- # Adding transformed data containing table to garbage collector
810
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
875
+ if not volatile and not persist:
876
+ # Adding transformed data containing table to garbage collector
877
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
811
878
  self._display_msg(msg="Updated dataset sample after performing customized missing value imputation:",
812
879
  data=self.data,
813
880
  progress_bar=self.progress_bar)
@@ -848,6 +915,8 @@ class _FeatureEngineering:
848
915
  equal_width_bin_columns = []
849
916
  var_width_bin_list = []
850
917
  var_width_bin_columns = []
918
+ volatile = extracted_col.pop("volatile", False)
919
+ persist = extracted_col.pop("persist", False)
851
920
 
852
921
  # Checking for column present in dataset or not
853
922
  _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "BincodeParam", self.data, "df")
@@ -881,7 +950,9 @@ class _FeatureEngineering:
881
950
  "data" : self.data,
882
951
  "target_columns": equal_width_bin_columns,
883
952
  "method_type" : "Equal-Width",
884
- "nbins" : bins
953
+ "nbins" : bins,
954
+ "volatile" : volatile,
955
+ "persist" : persist
885
956
  }
886
957
  eql_bin_code_fit = BincodeFit(**fit_params)
887
958
  # Storing fit object and column list for Equal-Width binning in data transform dictionary
@@ -894,11 +965,19 @@ class _FeatureEngineering:
894
965
  "data" : self.data,
895
966
  "object" : eql_bin_code_fit.output,
896
967
  "accumulate" : accumulate_columns,
897
- "persist" : True,
968
+ "persist" : True
898
969
  }
970
+ # Disabling display table name if persist is True by default
971
+ if not volatile and not persist:
972
+ eql_transform_params["display_table_name"] = False
973
+
974
+ if volatile:
975
+ eql_transform_params["volatile"] = True
976
+ eql_transform_params["persist"] = False
899
977
  self.data = BincodeTransform(**eql_transform_params).result
900
- # Adding transformed data containing table to garbage collector
901
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
978
+ if not volatile and not persist:
979
+ # Adding transformed data containing table to garbage collector
980
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
902
981
  self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
903
982
  data=self.data,
904
983
  progress_bar=self.progress_bar)
@@ -923,7 +1002,9 @@ class _FeatureEngineering:
923
1002
  "maxvalue_column" : "MaxValue",
924
1003
  "label_column" : "Label",
925
1004
  "method_type" : "Variable-Width",
926
- "label_prefix" : "label_prefix"
1005
+ "label_prefix" : "label_prefix",
1006
+ "volatile" : volatile,
1007
+ "persist" : persist
927
1008
  }
928
1009
  var_bin_code_fit = BincodeFit(**fit_params)
929
1010
  # Storing fit object and column list for Variable-Width binning in data transform dictionary
@@ -935,11 +1016,19 @@ class _FeatureEngineering:
935
1016
  "object" : var_bin_code_fit.output,
936
1017
  "object_order_column" : "TD_MinValue_BINFIT",
937
1018
  "accumulate" : accumulate_columns,
938
- "persist" : True
1019
+ "persist" : True
939
1020
  }
1021
+ # Disabling display table name if persist is True by default
1022
+ if not volatile and not persist:
1023
+ var_transform_params["display_table_name"] = False
1024
+
1025
+ if volatile:
1026
+ var_transform_params["volatile"] = True
1027
+ var_transform_params["persist"] = False
940
1028
  self.data = BincodeTransform(**var_transform_params).result
941
- # Adding transformed data containing table to garbage collector
942
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1029
+ if not volatile and not persist:
1030
+ # Adding transformed data containing table to garbage collector
1031
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
943
1032
  self._display_msg(msg="Updated dataset sample after performing Variable-Width binning:",
944
1033
  data=self.data,
945
1034
  progress_bar=self.progress_bar)
@@ -963,11 +1052,13 @@ class _FeatureEngineering:
963
1052
  # Storing custom string manipulation indicator in data transform dictionary
964
1053
  self.data_transform_dict['custom_string_manipulation_ind'] = True
965
1054
  # Fetching list required for performing operation.
966
- extracted_col = self.custom_data.get("StringManipulationParam", None)
1055
+ extracted_col = self.custom_data.get("StringManipulationParam", None).copy()
967
1056
  if not extracted_col:
968
1057
  self._display_msg(inline_msg="No information provided for performing string manipulation.",
969
1058
  progress_bar=self.progress_bar)
970
1059
  else:
1060
+ volatile = extracted_col.pop("volatile", False)
1061
+ persist = extracted_col.pop("persist", False)
971
1062
  # Checking for column present in dataset or not
972
1063
  _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "StringManipulationParam", self.data, "df")
973
1064
 
@@ -980,8 +1071,9 @@ class _FeatureEngineering:
980
1071
  data=self.data,
981
1072
  progress_bar=self.progress_bar)
982
1073
  else:
983
- self._display_msg(inline_msg="Skipping customized string manipulation.")
984
-
1074
+ self._display_msg(inline_msg="Skipping customized string manipulation.",
1075
+ progress_bar=self.progress_bar)
1076
+
985
1077
  def _str_method_mapping(self,
986
1078
  target_col,
987
1079
  transform_val):
@@ -1010,7 +1102,11 @@ class _FeatureEngineering:
1010
1102
 
1011
1103
  # Fetching required parameters from json object
1012
1104
  string_operation = transform_val["StringOperation"]
1013
-
1105
+
1106
+ # Setting volatile and persist parameters for performing string manipulation
1107
+ volatile, persist = self._set_generic_parameters(func_indicator="StringManipulationIndicator",
1108
+ param_name="StringManipulationParam")
1109
+
1014
1110
  # Storing general parameters for performing string transformation
1015
1111
  fit_params = {
1016
1112
  "data" : self.data,
@@ -1020,6 +1116,14 @@ class _FeatureEngineering:
1020
1116
  "inplace" : True,
1021
1117
  "persist" : True
1022
1118
  }
1119
+ # Disabling display table name if persist is True by default
1120
+ if not volatile and not persist:
1121
+ fit_params["display_table_name"] = False
1122
+
1123
+ if volatile:
1124
+ fit_params["volatile"] = True
1125
+ fit_params["persist"] = False
1126
+
1023
1127
  # Adding additional parameters based on string operation type
1024
1128
  if string_operation in ["StringCon", "StringTrim"]:
1025
1129
  string_argument = transform_val["String"]
@@ -1040,13 +1144,14 @@ class _FeatureEngineering:
1040
1144
 
1041
1145
  # returning dataset after performing string manipulation
1042
1146
  transform_output = StrApply(**fit_params).result
1043
- # Adding transformed data containing table to garbage collector
1044
- GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1147
+ if not volatile and not persist:
1148
+ # Adding transformed data containing table to garbage collector
1149
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1045
1150
  return transform_output
1046
1151
 
1047
1152
  def _one_hot_encoding(self,
1048
- one_hot_columns,
1049
- unique_counts):
1153
+ one_hot_columns,
1154
+ unique_counts):
1050
1155
  """
1051
1156
  DESCRIPTION:
1052
1157
  Function performs the one hot encoding to categorcial columns/features in the dataset.
@@ -1060,12 +1165,16 @@ class _FeatureEngineering:
1060
1165
  unique_counts:
1061
1166
  Required Argument.
1062
1167
  Specifies the unique counts in the categorical columns.
1063
- Types: int or list of integer (int)
1064
-
1168
+ Types: int or list of integer (int)
1065
1169
  """
1066
1170
  # TD function will add extra column_other in onehotEncoding, so
1067
1171
  # initailizing this list to remove those extra columns
1068
1172
  drop_lst = [ele + "_other" for ele in one_hot_columns]
1173
+
1174
+ # Setting volatile and persist parameters for performing encoding
1175
+ volatile, persist = self._set_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1176
+ param_name="CategoricalEncodingParam")
1177
+
1069
1178
  # Adding fit parameters for performing encoding
1070
1179
  fit_params = {
1071
1180
  "data" : self.data,
@@ -1073,7 +1182,9 @@ class _FeatureEngineering:
1073
1182
  "is_input_dense" : True,
1074
1183
  "target_column" : one_hot_columns,
1075
1184
  "category_counts" : unique_counts,
1076
- "other_column" : "other"
1185
+ "other_column" : "other",
1186
+ "volatile" : volatile,
1187
+ "persist" : persist
1077
1188
  }
1078
1189
  # Performing one hot encoding fit on target columns
1079
1190
  fit_obj = OneHotEncodingFit(**fit_params)
@@ -1089,10 +1200,21 @@ class _FeatureEngineering:
1089
1200
  "is_input_dense" : True,
1090
1201
  "persist" : True
1091
1202
  }
1203
+ # Disabling display table name if persist is True by default
1204
+ if not volatile and not persist:
1205
+ transform_params["display_table_name"] = False
1206
+
1207
+ # Setting persist to False if volatile is True
1208
+ if volatile:
1209
+ transform_params["volatile"] = True
1210
+ transform_params["persist"] = False
1211
+
1092
1212
  # Performing one hot encoding transformation
1093
1213
  transform_output = OneHotEncodingTransform(**transform_params).result
1094
- # Adding transformed data containing table to garbage collector
1095
- GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1214
+
1215
+ if not volatile and not persist:
1216
+ # Adding transformed data containing table to garbage collector
1217
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1096
1218
  self.data = transform_output.drop(drop_lst, axis=1)
1097
1219
 
1098
1220
  def _ordinal_encoding(self,
@@ -1107,10 +1229,16 @@ class _FeatureEngineering:
1107
1229
  Specifies the categorical columns for which ordinal encoding will be performed.
1108
1230
  Types: str or list of strings (str)
1109
1231
  """
1232
+ # Setting volatile and persist parameters for performing encoding
1233
+ volatile, persist = self._set_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1234
+ param_name="CategoricalEncodingParam")
1235
+
1110
1236
  # Adding fit parameters for performing encoding
1111
1237
  fit_params = {
1112
1238
  "data" : self.data,
1113
- "target_column" : ordinal_columns
1239
+ "target_column" : ordinal_columns,
1240
+ "volatile" : volatile,
1241
+ "persist" : persist
1114
1242
  }
1115
1243
  # Performing ordinal encoding fit on target columns
1116
1244
  ord_fit_obj = OrdinalEncodingFit(**fit_params)
@@ -1130,17 +1258,27 @@ class _FeatureEngineering:
1130
1258
  "accumulate" : accumulate_columns,
1131
1259
  "persist" : True
1132
1260
  }
1261
+ # Disabling display table name if persist is True by default
1262
+ if not volatile and not persist:
1263
+ transform_params["display_table_name"] = False
1264
+
1265
+ # Setting persist to False if volatile is True
1266
+ if volatile:
1267
+ transform_params["volatile"] = True
1268
+ transform_params["persist"] = False
1133
1269
  # Performing ordinal encoding transformation
1134
1270
  self.data = OrdinalEncodingTransform(**transform_params).result
1135
- # Adding transformed data containing table to garbage collector
1136
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1271
+
1272
+ if not volatile and not persist:
1273
+ # Adding transformed data containing table to garbage collector
1274
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1137
1275
 
1138
1276
  if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
1139
1277
  self.target_label = ord_fit_obj
1140
1278
 
1141
1279
 
1142
1280
  def _target_encoding(self,
1143
- target_encoding_list):
1281
+ target_encoding_list):
1144
1282
  """
1145
1283
  DESCRIPTION:
1146
1284
  Function performs the target encoding to categorcial columns/features in the dataset.
@@ -1165,6 +1303,11 @@ class _FeatureEngineering:
1165
1303
  # Storing indicator and fit object for target encoding in data transform dictionary
1166
1304
  self.data_transform_dict["custom_target_encoding_ind"] = True
1167
1305
  self.data_transform_dict["custom_target_encoding_fit_obj"] = {}
1306
+
1307
+ # Setting volatile and persist parameters for performing encoding
1308
+ volatile, persist = self._set_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1309
+ param_name="CategoricalEncodingParam")
1310
+
1168
1311
  # Fetching required argument for performing target encoding
1169
1312
  for col,transform_val in target_encoding_list.items():
1170
1313
  encoder_method = transform_val["encoder_method"]
@@ -1175,7 +1318,9 @@ class _FeatureEngineering:
1175
1318
  "category_data" : category_data,
1176
1319
  "encoder_method" : encoder_method,
1177
1320
  "target_columns" : col,
1178
- "response_column" : response_column
1321
+ "response_column" : response_column,
1322
+ "volatile" : volatile,
1323
+ "persist" : persist
1179
1324
  }
1180
1325
  if encoder_method == "CBM_DIRICHLET":
1181
1326
  num_distinct_responses=transform_val["num_distinct_responses"]
@@ -1184,7 +1329,7 @@ class _FeatureEngineering:
1184
1329
  # Performing target encoding fit on target columns
1185
1330
  tar_fit_obj = TargetEncodingFit(**fit_params)
1186
1331
  # Storing each column fit object for target encoding in data transform dictionary
1187
- self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj})
1332
+ self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj.result})
1188
1333
  # Extracting accumulate columns
1189
1334
  accumulate_columns = self._extract_list(self.data.columns, [col])
1190
1335
  # Adding transform parameters for performing encoding
@@ -1192,12 +1337,21 @@ class _FeatureEngineering:
1192
1337
  "data" : self.data,
1193
1338
  "object" : tar_fit_obj,
1194
1339
  "accumulate" : accumulate_columns,
1195
- "persist" : True
1340
+ "persist" : True
1196
1341
  }
1342
+
1343
+ # Disabling display table name if persist is True by default
1344
+ if not volatile and not persist:
1345
+ transform_params["display_table_name"] = False
1346
+
1347
+ if volatile:
1348
+ transform_params["volatile"] = True
1349
+ transform_params["persist"] = False
1197
1350
  # Performing ordinal encoding transformation
1198
1351
  self.data = TargetEncodingTransform(**transform_params).result
1199
- # Adding transformed data containing table to garbage collector
1200
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1352
+ if not volatile and not persist:
1353
+ # Adding transformed data containing table to garbage collector
1354
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1201
1355
 
1202
1356
  def _encoding_categorical_columns(self):
1203
1357
  """
@@ -1262,8 +1416,10 @@ class _FeatureEngineering:
1262
1416
  # Storing custom categorical encoding indicator in data transform dictionary
1263
1417
  self.data_transform_dict["custom_categorical_encoding_ind"] = True
1264
1418
  # Fetching user input list for performing
1265
- encoding_list = self.custom_data.get("CategoricalEncodingParam", None)
1419
+ encoding_list = self.custom_data.get("CategoricalEncodingParam", None).copy()
1266
1420
  if encoding_list:
1421
+ volatile = encoding_list.pop("volatile", False)
1422
+ persist = encoding_list.pop("persist", False)
1267
1423
  onehot_encode_ind = encoding_list.get("OneHotEncodingIndicator", False)
1268
1424
  ordinal_encode_ind = encoding_list.get("OrdinalEncodingIndicator", False)
1269
1425
  target_encode_ind = encoding_list.get("TargetEncodingIndicator", False)
@@ -1340,11 +1496,25 @@ class _FeatureEngineering:
1340
1496
  """
1341
1497
  DESCRIPTION:
1342
1498
  Function to perform different numerical transformations using NumApply on numerical features based on user input.
1343
-
1499
+
1500
+ PARAMETERS:
1501
+ target_col:
1502
+ Required Argument.
1503
+ Specifies the numerical column for which transformation will be performed.
1504
+ Types: str
1505
+
1506
+ transform_val:
1507
+ Required Argument.
1508
+ Specifies different parameter require for applying numerical transformation.
1509
+ Types: dict
1344
1510
  """
1345
1511
  # Fetching columns for accumulation
1346
1512
  accumulate_columns = self._extract_list(self.data.columns, [target_col])
1347
1513
  apply_method = transform_val["apply_method"]
1514
+
1515
+ # Setting volatile and persist parameters for performing transformation
1516
+ volatile, persist = self._set_generic_parameters(func_indicator="MathameticalTransformationIndicator",
1517
+ param_name="MathameticalTransformationParam")
1348
1518
  # Adding fit parameters for performing transformation
1349
1519
  fit_params={
1350
1520
  "data": self.data,
@@ -1354,17 +1524,25 @@ class _FeatureEngineering:
1354
1524
  "persist" :True,
1355
1525
  "accumulate" : accumulate_columns
1356
1526
  }
1527
+ # Disabling display table name if persist is True by default
1528
+ if not volatile and not persist:
1529
+ fit_params["display_table_name"] = False
1530
+
1531
+ if volatile:
1532
+ fit_params["volatile"] = True
1533
+ fit_params["persist"] = False
1357
1534
  # Adding addition details for fit parameters in case of SIGMOID transformation
1358
1535
  if apply_method == "sigmoid":
1359
1536
  sigmoid_style=transform_val["sigmoid_style"]
1360
1537
  fit_params = {**fit_params, "sigmoid_style" : sigmoid_style}
1361
1538
  # Performing transformation on target columns
1362
1539
  transform_output = NumApply(**fit_params).result
1363
- # Adding transformed data containing table to garbage collector
1364
- GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1540
+ if not volatile and not persist:
1541
+ # Adding transformed data containing table to garbage collector
1542
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1365
1543
  return transform_output
1366
1544
 
1367
- def _numerical_transformation(self, target_columns, num_transform_data):
1545
+ def _numerical_transformation(self, target_columns, num_transform_data, volatile, persist):
1368
1546
  """
1369
1547
  DESCRIPTION:
1370
1548
  Function to perform different numerical transformations using Fit and Transform on numerical features based on user input.
@@ -1374,7 +1552,9 @@ class _FeatureEngineering:
1374
1552
  fit_params={
1375
1553
  "data" : self.data,
1376
1554
  "object" : num_transform_data,
1377
- "object_order_column" : "TargetColumn"
1555
+ "object_order_column" : "TargetColumn",
1556
+ "volatile" : volatile,
1557
+ "persist" : persist
1378
1558
  }
1379
1559
  # Peforming fit with all arguments.
1380
1560
  num_fit_obj = Fit(**fit_params)
@@ -1392,10 +1572,18 @@ class _FeatureEngineering:
1392
1572
  "id_columns" : id_columns,
1393
1573
  "persist" :True
1394
1574
  }
1575
+ # Disabling display table name if persist is True by default
1576
+ if not volatile and not persist:
1577
+ transform_params["display_table_name"] = False
1578
+
1579
+ if volatile:
1580
+ transform_params["volatile"] = True
1581
+ transform_params["persist"] = False
1395
1582
  # Peforming transformation on target columns
1396
1583
  self.data = Transform(**transform_params).result
1397
- # Adding transformed data containing table to garbage collector
1398
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1584
+ if not volatile and not persist:
1585
+ # Adding transformed data containing table to garbage collector
1586
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1399
1587
  self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
1400
1588
  data=self.data,
1401
1589
  progress_bar=self.progress_bar)
@@ -1414,8 +1602,11 @@ class _FeatureEngineering:
1414
1602
  # Checking user input for mathematical transformations
1415
1603
  if mat_transform_input:
1416
1604
  # Extracting list required for mathematical transformations
1417
- mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None)
1605
+ mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None).copy()
1606
+
1418
1607
  if mat_transform_list:
1608
+ volatile = mat_transform_list.pop("volatile", False)
1609
+ persist = mat_transform_list.pop("persist", False)
1419
1610
  # Checking for column present in dataset or not
1420
1611
  _Validators._validate_dataframe_has_argument_columns(list(mat_transform_list.keys()),
1421
1612
  "MathameticalTransformationParam", self.data, "df")
@@ -1459,7 +1650,7 @@ class _FeatureEngineering:
1459
1650
  copy_to_sql(df=transform_data, table_name="automl_num_transform_data", temporary=True)
1460
1651
  num_transform_data = DataFrame.from_table("automl_num_transform_data")
1461
1652
  # Applying transformation using Fit/Transform functions
1462
- self._numerical_transformation(target_columns, num_transform_data)
1653
+ self._numerical_transformation(target_columns, num_transform_data, volatile, persist)
1463
1654
  # Storing custom numerical transformation parameters and column list in data transform dictionary
1464
1655
  self.data_transform_dict['custom_numerical_transformation_col'] = target_columns
1465
1656
  self.data_transform_dict['custom_numerical_transformation_params'] = num_transform_data
@@ -1485,6 +1676,8 @@ class _FeatureEngineering:
1485
1676
  nl_transform_list = self.custom_data.get("NonLinearTransformationParam", None)
1486
1677
  # Extracting list required for non-linear transformation
1487
1678
  if nl_transform_list:
1679
+ volatile = nl_transform_list.pop("volatile", False)
1680
+ persist = nl_transform_list.pop("persist", False)
1488
1681
  total_combination = len(nl_transform_list)
1489
1682
  # Generating all possible combination names
1490
1683
  possible_combination = ["Combination_"+str(counter) for counter in range(1,total_combination+1)]
@@ -1511,12 +1704,14 @@ class _FeatureEngineering:
1511
1704
  "data" : self.data,
1512
1705
  "target_columns" : target_columns,
1513
1706
  "formula" : formula,
1514
- "result_column" : result_column
1707
+ "result_column" : result_column,
1708
+ "volatile" : volatile,
1709
+ "persist" : persist
1515
1710
  }
1516
1711
  # Performing fit on dataset
1517
1712
  fit_obj = NonLinearCombineFit(**fit_param)
1518
1713
  # Updating it for each non-linear combination
1519
- self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj})
1714
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj.result})
1520
1715
  # Adding transform params for transformation
1521
1716
  transform_params = {
1522
1717
  "data" : self.data,
@@ -1524,9 +1719,18 @@ class _FeatureEngineering:
1524
1719
  "accumulate" : self.data.columns,
1525
1720
  "persist" : True
1526
1721
  }
1722
+ # Disabling display table name if persist is True by default
1723
+ if not volatile and not persist:
1724
+ transform_params["display_table_name"] = False
1725
+
1726
+ if volatile:
1727
+ transform_params["volatile"] = True
1728
+ transform_params["persist"] = False
1527
1729
  self.data = NonLinearCombineTransform(**transform_params).result
1528
- # Adding transformed data containing table to garbage collector
1529
- GarbageCollector._add_to_garbagecollector(self.data._table_name)
1730
+
1731
+ if not volatile and not persist:
1732
+ # Adding transformed data containing table to garbage collector
1733
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1530
1734
  else:
1531
1735
  self._display_msg(inline_msg="Combinations are not as per expectation.",
1532
1736
  progress_bar=self.progress_bar)
@@ -1552,29 +1756,64 @@ class _FeatureEngineering:
1552
1756
  anti_select_input = self.custom_data.get("AntiselectIndicator", False)
1553
1757
  # Checking user input for anti-select columns
1554
1758
  if anti_select_input:
1555
- # Extracting list required for anti-select columns
1556
- anti_select_list = self.custom_data.get("AntiselectParam", None)
1557
- if(anti_select_list):
1558
- if all(item in self.data.columns for item in anti_select_list):
1559
- # Storing custom anti-select columns indicator and column list in data transform dictionary
1560
- self.data_transform_dict['custom_anti_select_columns_ind'] = True
1561
- self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
1562
- fit_params = {
1563
- "data" : self.data,
1564
- "exclude" : anti_select_list
1565
- }
1566
- # Performing transformation for given user input
1567
- self.data = Antiselect(**fit_params).result
1568
- self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
1569
- data=self.data,
1570
- progress_bar=self.progress_bar)
1571
- else:
1572
- self._display_msg(msg="Columns provided in list are not present in dataset:",
1573
- col_lst=anti_select_list,
1574
- progress_bar=self.progress_bar)
1759
+ anti_select_params = self.custom_data.get("AntiselectParam", None)
1760
+ if anti_select_params:
1761
+ # Extracting list required for anti-select columns
1762
+ anti_select_list = anti_select_params.get("excluded_columns", None)
1763
+ volatile = anti_select_params.get("volatile", False)
1764
+ persist = anti_select_params.get("persist", False)
1765
+ if(anti_select_list):
1766
+ if all(item in self.data.columns for item in anti_select_list):
1767
+ # Storing custom anti-select columns indicator and column list in data transform dictionary
1768
+ self.data_transform_dict['custom_anti_select_columns_ind'] = True
1769
+ self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
1770
+ fit_params = {
1771
+ "data" : self.data,
1772
+ "exclude" : anti_select_list,
1773
+ "volatile" : volatile,
1774
+ "persist" : persist
1775
+ }
1776
+ # Performing transformation for given user input
1777
+ self.data = Antiselect(**fit_params).result
1778
+ self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
1779
+ data=self.data,
1780
+ progress_bar=self.progress_bar)
1781
+ else:
1782
+ self._display_msg(msg="Columns provided in list are not present in dataset:",
1783
+ col_lst=anti_select_list,
1784
+ progress_bar=self.progress_bar)
1575
1785
  else:
1576
1786
  self._display_msg(inline_msg="No information provided for performing anti-select columns operation.",
1577
1787
  progress_bar=self.progress_bar)
1578
1788
  else:
1579
1789
  self._display_msg(inline_msg="Skipping customized anti-select columns.",
1580
- progress_bar=self.progress_bar)
1790
+ progress_bar=self.progress_bar)
1791
+
1792
+ def _set_generic_parameters(self,
1793
+ func_indicator=None,
1794
+ param_name=None):
1795
+ """
1796
+ DESCRIPTION:
1797
+ Function to set generic parameters.
1798
+
1799
+ PARAMETERS:
1800
+ func_indicator:
1801
+ Optional Argument.
1802
+ Specifies the name of function indicator.
1803
+ Types: str
1804
+
1805
+ param_name:
1806
+ Optional Argument.
1807
+ Specifies the name of the param which contains generic parameters.
1808
+ Types: str
1809
+
1810
+ RETURNS:
1811
+ Tuple containing volatile and persist parameters.
1812
+ """
1813
+ volatile = self.volatile
1814
+ persist = self.persist
1815
+ if self.custom_data is not None and self.custom_data.get(func_indicator, False):
1816
+ volatile = self.custom_data[param_name].get("volatile", False)
1817
+ persist = self.custom_data[param_name].get("persist", False)
1818
+
1819
+ return (volatile, persist)