teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +183 -0
  4. teradataml/__init__.py +6 -3
  5. teradataml/_version.py +2 -2
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +275 -40
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +1 -0
  11. teradataml/analytics/json_parser/utils.py +17 -21
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +10 -2
  15. teradataml/analytics/table_operator/__init__.py +3 -2
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +62 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1553 -319
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +276 -319
  22. teradataml/automl/data_transformation.py +163 -81
  23. teradataml/automl/feature_engineering.py +402 -239
  24. teradataml/automl/feature_exploration.py +9 -2
  25. teradataml/automl/model_evaluation.py +48 -51
  26. teradataml/automl/model_training.py +291 -189
  27. teradataml/catalog/byom.py +8 -8
  28. teradataml/catalog/model_cataloging_utils.py +1 -1
  29. teradataml/clients/auth_client.py +133 -0
  30. teradataml/clients/pkce_client.py +1 -1
  31. teradataml/common/aed_utils.py +3 -2
  32. teradataml/common/constants.py +48 -6
  33. teradataml/common/deprecations.py +13 -7
  34. teradataml/common/garbagecollector.py +156 -120
  35. teradataml/common/messagecodes.py +6 -1
  36. teradataml/common/messages.py +3 -1
  37. teradataml/common/sqlbundle.py +1 -1
  38. teradataml/common/utils.py +103 -11
  39. teradataml/common/wrapper_utils.py +1 -1
  40. teradataml/context/context.py +121 -31
  41. teradataml/data/advertising.csv +201 -0
  42. teradataml/data/bank_marketing.csv +11163 -0
  43. teradataml/data/bike_sharing.csv +732 -0
  44. teradataml/data/boston2cols.csv +721 -0
  45. teradataml/data/breast_cancer.csv +570 -0
  46. teradataml/data/complaints_test_tokenized.csv +353 -0
  47. teradataml/data/complaints_tokens_model.csv +348 -0
  48. teradataml/data/covid_confirm_sd.csv +83 -0
  49. teradataml/data/customer_segmentation_test.csv +2628 -0
  50. teradataml/data/customer_segmentation_train.csv +8069 -0
  51. teradataml/data/dataframe_example.json +10 -0
  52. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  53. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  54. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  55. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  56. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  57. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  58. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  59. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  60. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  61. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  62. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  63. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  64. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  65. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  66. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  67. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  68. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  69. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  70. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  71. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  72. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  73. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  74. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  75. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  76. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  77. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  78. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  79. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  80. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  81. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  82. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  83. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  84. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  85. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  86. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  87. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  88. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  89. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  90. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  91. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  92. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  93. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  94. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  95. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  96. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  97. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  98. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  99. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  100. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  101. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  102. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  103. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  104. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  105. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  106. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  107. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  108. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  109. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  110. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  111. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  112. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  113. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  114. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  115. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  116. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  117. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  118. teradataml/data/dwt2d_dataTable.csv +65 -0
  119. teradataml/data/dwt_dataTable.csv +8 -0
  120. teradataml/data/dwt_filterTable.csv +3 -0
  121. teradataml/data/finance_data4.csv +13 -0
  122. teradataml/data/glm_example.json +28 -1
  123. teradataml/data/grocery_transaction.csv +19 -0
  124. teradataml/data/housing_train_segment.csv +201 -0
  125. teradataml/data/idwt2d_dataTable.csv +5 -0
  126. teradataml/data/idwt_dataTable.csv +8 -0
  127. teradataml/data/idwt_filterTable.csv +3 -0
  128. teradataml/data/insect2Cols.csv +61 -0
  129. teradataml/data/interval_data.csv +5 -0
  130. teradataml/data/jsons/paired_functions.json +14 -0
  131. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  132. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  133. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  134. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  135. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  136. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  137. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  138. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  139. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  140. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  141. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  142. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  143. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  144. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  145. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  146. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  147. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  148. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  149. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  150. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  151. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  152. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  153. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  154. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  155. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  156. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  157. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  158. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  159. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  160. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  161. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  162. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  163. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  164. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  165. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  166. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  167. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  168. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  169. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  170. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  171. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  172. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  173. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  174. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  175. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  176. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  177. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  178. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  179. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  180. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  181. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  182. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  183. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  184. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  185. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  186. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  187. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  188. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  189. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  190. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  191. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  192. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  193. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  194. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  195. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  196. teradataml/data/kmeans_example.json +5 -0
  197. teradataml/data/kmeans_table.csv +10 -0
  198. teradataml/data/load_example_data.py +8 -2
  199. teradataml/data/naivebayestextclassifier_example.json +1 -1
  200. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  201. teradataml/data/onehot_encoder_train.csv +4 -0
  202. teradataml/data/openml_example.json +29 -0
  203. teradataml/data/peppers.png +0 -0
  204. teradataml/data/real_values.csv +14 -0
  205. teradataml/data/sax_example.json +8 -0
  206. teradataml/data/scale_attributes.csv +3 -0
  207. teradataml/data/scale_example.json +52 -1
  208. teradataml/data/scale_input_part_sparse.csv +31 -0
  209. teradataml/data/scale_input_partitioned.csv +16 -0
  210. teradataml/data/scale_input_sparse.csv +11 -0
  211. teradataml/data/scale_parameters.csv +3 -0
  212. teradataml/data/scripts/deploy_script.py +21 -2
  213. teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
  214. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
  215. teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
  216. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  217. teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
  218. teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
  219. teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
  220. teradataml/data/star_pivot.csv +8 -0
  221. teradataml/data/templates/open_source_ml.json +2 -1
  222. teradataml/data/teradataml_example.json +97 -1
  223. teradataml/data/timestamp_data.csv +4 -0
  224. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  225. teradataml/data/uaf_example.json +55 -1
  226. teradataml/data/unpivot_example.json +15 -0
  227. teradataml/data/url_data.csv +9 -0
  228. teradataml/data/windowdfft.csv +16 -0
  229. teradataml/data/ztest_example.json +16 -0
  230. teradataml/dataframe/copy_to.py +9 -4
  231. teradataml/dataframe/data_transfer.py +125 -64
  232. teradataml/dataframe/dataframe.py +575 -57
  233. teradataml/dataframe/dataframe_utils.py +47 -9
  234. teradataml/dataframe/fastload.py +273 -90
  235. teradataml/dataframe/functions.py +339 -0
  236. teradataml/dataframe/row.py +160 -0
  237. teradataml/dataframe/setop.py +2 -2
  238. teradataml/dataframe/sql.py +740 -18
  239. teradataml/dataframe/window.py +1 -1
  240. teradataml/dbutils/dbutils.py +324 -18
  241. teradataml/geospatial/geodataframe.py +1 -1
  242. teradataml/geospatial/geodataframecolumn.py +1 -1
  243. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  244. teradataml/lib/aed_0_1.dll +0 -0
  245. teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
  246. teradataml/options/__init__.py +16 -5
  247. teradataml/options/configure.py +39 -6
  248. teradataml/options/display.py +2 -2
  249. teradataml/plot/axis.py +4 -4
  250. teradataml/scriptmgmt/UserEnv.py +26 -19
  251. teradataml/scriptmgmt/lls_utils.py +120 -16
  252. teradataml/table_operators/Script.py +4 -5
  253. teradataml/table_operators/TableOperator.py +160 -26
  254. teradataml/table_operators/table_operator_util.py +88 -41
  255. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  256. teradataml/telemetry_utils/__init__.py +0 -0
  257. teradataml/telemetry_utils/queryband.py +52 -0
  258. teradataml/utils/validators.py +41 -3
  259. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
  260. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
  261. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  262. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  263. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -31,6 +31,8 @@ from teradataml import ScaleTransform
31
31
  from teradataml import SimpleImputeTransform
32
32
  from teradataml import TargetEncodingTransform
33
33
  from teradataml import Transform, UtilFuncs, TeradataConstants
34
+ from teradataml.common.garbagecollector import GarbageCollector
35
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
34
36
 
35
37
  # AutoML Internal libraries
36
38
  from teradataml.automl.feature_exploration import _FeatureExplore
@@ -58,12 +60,12 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
58
60
  Types: teradataml Dataframe
59
61
 
60
62
  data_transformation_params:
61
- Required Arugment.
63
+ Required Argument.
62
64
  Specifies the parameters for performing data transformation.
63
65
  Types: dict
64
66
 
65
67
  auto:
66
- Optional Arugment.
68
+ Optional Argument.
67
69
  Specifies whether to run AutoML in custom mode or auto mode.
68
70
  When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
69
71
  Default Value: True
@@ -80,7 +82,7 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
80
82
  Types: int
81
83
 
82
84
  target_column_ind:
83
- Optional Arugment.
85
+ Optional Argument.
84
86
  Specifies whether target column is present in given dataset.
85
87
  Default Value: False
86
88
  Types: bool
@@ -91,6 +93,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
91
93
  self.verbose = verbose
92
94
  self.target_column_ind = target_column_ind
93
95
  self.table_name_mapping = table_name_mapping
96
+ self.data_node_id = data._nodeid
97
+ self.table_name_mapping[self.data_node_id] = {}
94
98
 
95
99
  def data_transformation(self):
96
100
  """
@@ -118,6 +122,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
118
122
  # Extracting target column details and type whether it is classification or not
119
123
  self.data_target_column = self.data_transformation_params.get("data_target_column")
120
124
  self.classification_type = self.data_transformation_params.get("classification_type", False)
125
+
126
+ # Setting number of jobs for progress bar based on mode of execution
127
+ jobs = 10 if self.auto else 15
128
+ self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Transformation Running:')
129
+
121
130
  # Performing transformation carried out in feature engineering phase
122
131
  self.feature_engineering_transformation()
123
132
  # Performing transformation carried out in data preparation phase
@@ -133,27 +142,52 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
133
142
  on test data using parameters from data_transformation_params.
134
143
  """
135
144
  self._display_msg(msg="Performing transformation carried out in feature engineering phase ...",
136
- show_data=True)
145
+ show_data=True,
146
+ progress_bar=self.progress_bar)
147
+
137
148
  # Performing default transformation for both auto and custom mode
138
149
  self._preprocess_transformation()
150
+ self.progress_bar.update()
151
+
139
152
  self._futile_column_handling_transformation()
153
+ self.progress_bar.update()
154
+
140
155
  # Handling target column transformation
141
156
  if self.target_column_ind and self.classification_type:
142
157
  self._handle_target_column_transformation()
158
+ self.progress_bar.update()
159
+
143
160
  self._date_column_handling_transformation()
161
+ self.progress_bar.update()
144
162
 
145
163
  # Performing transformation according to run mode
146
164
  if self.auto:
147
165
  self._missing_value_handling_transformation()
166
+ self.progress_bar.update()
167
+
148
168
  self._categorical_encoding_transformation()
169
+ self.progress_bar.update()
149
170
  else:
150
171
  self._custom_missing_value_handling_transformation()
172
+ self.progress_bar.update()
173
+
151
174
  self._custom_bincode_column_transformation()
175
+ self.progress_bar.update()
176
+
152
177
  self._custom_string_column_transformation()
178
+ self.progress_bar.update()
179
+
153
180
  self._custom_categorical_encoding_transformation()
181
+ self.progress_bar.update()
182
+
154
183
  self._custom_mathematical_transformation()
184
+ self.progress_bar.update()
185
+
155
186
  self._custom_non_linear_transformation()
187
+ self.progress_bar.update()
188
+
156
189
  self._custom_anti_select_column_transformation()
190
+ self.progress_bar.update()
157
191
 
158
192
  def data_preparation_transformation(self):
159
193
  """
@@ -162,15 +196,23 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
162
196
  on test data using parameters from data_transformation_params.
163
197
  """
164
198
  self._display_msg(msg="Performing transformation carried out in data preparation phase ...",
165
- show_data=True)
199
+ show_data=True,
200
+ progress_bar=self.progress_bar)
201
+
166
202
  # Handling features transformed from feature engineering phase
167
203
  self._handle_generated_features_transformation()
204
+ self.progress_bar.update()
168
205
 
169
206
  # Performing transformation including feature selection using lasso, rfe and pca
170
207
  # followed by scaling
171
208
  self._feature_selection_lasso_transformation()
209
+ self.progress_bar.update()
210
+
172
211
  self._feature_selection_rfe_transformation()
212
+ self.progress_bar.update()
213
+
173
214
  self._feature_selection_pca_transformation()
215
+ self.progress_bar.update()
174
216
 
175
217
  def _preprocess_transformation(self):
176
218
  """
@@ -182,7 +224,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
182
224
  if columns_to_be_removed:
183
225
  self.data = self.data.drop(columns_to_be_removed, axis=1)
184
226
  self._display_msg(msg="\nUpdated dataset after dropping irrelevent columns :",
185
- data=self.data)
227
+ data=self.data,
228
+ progress_bar=self.progress_bar)
186
229
 
187
230
  # Adding id column
188
231
  self.data = FillRowId(data=self.data, row_id_column='id').result
@@ -197,7 +240,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
197
240
  if futile_cols:
198
241
  self.data = self.data.drop(futile_cols, axis=1)
199
242
  self._display_msg(msg="\nUpdated dataset after dropping futile columns :",
200
- data=self.data)
243
+ data=self.data,
244
+ progress_bar=self.progress_bar)
201
245
 
202
246
  def _date_column_handling_transformation(self):
203
247
  """
@@ -205,47 +249,32 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
205
249
  Function performs transformation on date columns and generates new columns.
206
250
  """
207
251
  # Extracting date columns
208
- date_columns = self.data_transformation_params.get("date_columns",None)
209
- if date_columns:
252
+ self.date_column_list = self.data_transformation_params.get("date_columns",None)
253
+ if self.date_column_list:
210
254
  # Dropping rows with null values in date columns
211
- self.data = self.data.dropna(subset=date_columns)
255
+ self.data = self.data.dropna(subset=self.date_column_list)
212
256
  # Extracting unique date columns for dropping
213
257
  drop_unique_date_columns = self.data_transformation_params.get("drop_unique_date_columns",None)
214
258
  if drop_unique_date_columns:
215
259
  self.data = self.data.drop(drop_unique_date_columns, axis=1)
260
+ # Updated date column list after dropping irrelevant date columns
261
+ self.date_column_list = [item for item in self.date_column_list if item not in drop_unique_date_columns]
216
262
 
217
- # Extracting date components parameters for new columns generation
218
- extract_date_comp_param = self.data_transformation_params.get("extract_date_comp_param",None)
219
- extract_date_comp_col = self.data_transformation_params.get("extract_date_comp_col", None)
220
- if extract_date_comp_param:
221
- self.data=self.data.assign(**extract_date_comp_param)
222
- self.data = self.data.drop(extract_date_comp_col, axis=1)
223
-
224
- # Extracting irrelevant date component columns for dropping
225
- drop_extract_date_columns = self.data_transformation_params.get("drop_extract_date_columns", None)
226
- if drop_extract_date_columns:
227
- self.data = self.data.drop(drop_extract_date_columns, axis=1)
228
-
229
- # Extracting date component fit objects for bincode transformation
230
- day_component_fit_object = self.data_transformation_params.get("day_component_fit_object", None)
231
- month_component_fit_object = self.data_transformation_params.get("month_component_fit_object", None)
232
- year_diff_component_fit_object = self.data_transformation_params.get("year_diff_component_fit_object", None)
233
-
234
- # Performing bincode transformation on day, month and year components
235
- for fit_object in [day_component_fit_object, month_component_fit_object, year_diff_component_fit_object]:
236
- if fit_object:
237
- for col, bin_code_fit in fit_object.items():
238
- accumulate_columns = self._extract_list(self.data.columns, [col])
239
- transform_params = {
240
- "data": self.data,
241
- "object": bin_code_fit,
242
- "accumulate": accumulate_columns,
243
- "persist": True
244
- }
245
- self.data = BincodeTransform(**transform_params).result
246
-
247
- self._display_msg(msg="\nUpdated dataset after transforming date columns :",
248
- data=self.data)
263
+ if len(self.date_column_list) != 0:
264
+ # Extracting date components parameters for new columns generation
265
+ new_columns=self._fetch_date_component()
266
+
267
+ # Extracting irrelevant date component columns for dropping
268
+ drop_extract_date_columns = self.data_transformation_params.get("drop_extract_date_columns", None)
269
+ if drop_extract_date_columns:
270
+ self.data = self.data.drop(drop_extract_date_columns, axis=1)
271
+ new_columns = [item for item in new_columns if item not in drop_extract_date_columns]
272
+
273
+ self._display_msg(msg='Updated list of newly generated features from existing date features :',
274
+ col_lst=new_columns)
275
+ self._display_msg(msg="\nUpdated dataset after transforming date columns :",
276
+ data=self.data,
277
+ progress_bar=self.progress_bar)
249
278
 
250
279
  def _missing_value_handling_transformation(self):
251
280
  """
@@ -257,7 +286,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
257
286
  if drop_cols:
258
287
  self.data = self.data.drop(drop_cols, axis=1)
259
288
  self._display_msg(msg="\nUpdated dataset after dropping missing value containing columns : ",
260
- data=self.data)
289
+ data=self.data,
290
+ progress_bar=self.progress_bar)
261
291
 
262
292
  # Extracting imputation columns and fit object for missing value imputation
263
293
  imputation_cols = self.data_transformation_params.get("imputation_columns", None)
@@ -265,20 +295,22 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
265
295
  sm_fit_obj = self.data_transformation_params.get("imputation_fit_object")
266
296
  # imputing column using fit object
267
297
  self.data = SimpleImputeTransform(data=self.data,
268
- object=sm_fit_obj,
269
- volatile=True).result
298
+ object=sm_fit_obj).result
270
299
  self._display_msg(msg="\nUpdated dataset after imputing missing value containing columns :",
271
- data=self.data)
300
+ data=self.data,
301
+ progress_bar=self.progress_bar)
272
302
 
273
303
  # Handling rest null, its temporary solution. It subjects to change based on input.
274
304
  dropped_data = self.data.dropna()
275
305
  dropped_count = self.data.shape[0] - dropped_data.shape[0]
276
306
  if dropped_count > 0:
277
- self.data = dropped_data
278
307
  self._display_msg(msg="\nFound additional {} rows that contain missing values :".format(dropped_count),
279
- data=self.data)
308
+ data=self.data,
309
+ progress_bar=self.progress_bar)
310
+ self.data = dropped_data
280
311
  self._display_msg(msg="\nUpdated dataset after dropping additional missing value containing rows :",
281
- data=self.data)
312
+ data=self.data,
313
+ progress_bar=self.progress_bar)
282
314
 
283
315
  def _custom_missing_value_handling_transformation(self):
284
316
  """
@@ -291,7 +323,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
291
323
  if drop_col_list:
292
324
  self.data = self.data.drop(drop_col_list, axis=1)
293
325
  self._display_msg(msg="\nUpdated dataset after dropping customized missing value containing columns :",
294
- data=self.data)
326
+ data=self.data,
327
+ progress_bar=self.progress_bar)
295
328
 
296
329
  # Extracting custom imputation columns and fit object for missing value imputation
297
330
  custom_imp_ind = self.data_transformation_params.get("custom_imputation_ind", False)
@@ -299,10 +332,10 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
299
332
  sm_fit_obj = self.data_transformation_params.get("custom_imputation_fit_object")
300
333
  # imputing column using fit object
301
334
  self.data = SimpleImputeTransform(data=self.data,
302
- object=sm_fit_obj,
303
- volatile=True).result
335
+ object=sm_fit_obj).result
304
336
  self._display_msg(msg="\nUpdated dataset after imputing customized missing value containing columns :",
305
- data=self.data)
337
+ data=self.data,
338
+ progress_bar=self.progress_bar)
306
339
  # Handling rest with default missing value handling
307
340
  self._missing_value_handling_transformation()
308
341
 
@@ -325,11 +358,15 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
325
358
  "data" : self.data,
326
359
  "object" : custom_eql_bincode_fit_object,
327
360
  "accumulate" : accumulate_columns,
328
- "persist" : True,
361
+ "persist" : True,
362
+ "display_table_name" : False
329
363
  }
330
364
  self.data = BincodeTransform(**eql_transform_params).result
365
+ # Adding transformed data containing table to garbage collector
366
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
331
367
  self._display_msg(msg="\nUpdated dataset after performing customized equal width bin-code transformation :",
332
- data=self.data)
368
+ data=self.data,
369
+ progress_bar=self.progress_bar)
333
370
 
334
371
  # Hnadling bincode transformation for Variable-Width
335
372
  custom_var_bincode_col = self.data_transformation_params.get("custom_var_bincode_col", None)
@@ -343,11 +380,15 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
343
380
  "object" : custom_var_bincode_fit_object,
344
381
  "object_order_column" : "TD_MinValue_BINFIT",
345
382
  "accumulate" : accumulate_columns,
346
- "persist" : True
383
+ "persist" : True,
384
+ "display_table_name" : False
347
385
  }
348
386
  self.data = BincodeTransform(**var_transform_params).result
387
+ # Adding transformed data containing table to garbage collector
388
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
349
389
  self._display_msg(msg="\nUpdated dataset after performing customized variable width bin-code transformation :",
350
- data=self.data)
390
+ data=self.data,
391
+ progress_bar=self.progress_bar)
351
392
 
352
393
  def _custom_string_column_transformation(self):
353
394
  """
@@ -362,7 +403,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
362
403
  for target_col,transform_val in custom_string_manipulation_param.items():
363
404
  self.data = self._str_method_mapping(target_col, transform_val)
364
405
  self._display_msg(msg="\nUpdated dataset after performing customized string manipulation :",
365
- data=self.data)
406
+ data=self.data,
407
+ progress_bar=self.progress_bar)
366
408
 
367
409
  def _categorical_encoding_transformation(self):
368
410
  """
@@ -380,14 +422,18 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
380
422
  "data" : self.data,
381
423
  "object" : fit_obj,
382
424
  "is_input_dense" : True,
383
- "persist" : True
425
+ "persist" : True,
426
+ "display_table_name" : False
384
427
  }
385
428
  # Performing one hot encoding transformation
386
429
  self.data = OneHotEncodingTransform(**transform_params).result
430
+ # Adding transformed data containing table to garbage collector
431
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
387
432
  # Dropping old columns after encoding
388
433
  self.data = self.data.drop(one_hot_encoding_drop_list, axis=1)
389
434
  self._display_msg(msg="\nUpdated dataset after performing categorical encoding :",
390
- data=self.data)
435
+ data=self.data,
436
+ progress_bar=self.progress_bar)
391
437
 
392
438
  def _custom_categorical_encoding_transformation(self):
393
439
  """
@@ -408,10 +454,13 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
408
454
  "data" : self.data,
409
455
  "object" : custom_ord_encoding_fit_obj,
410
456
  "accumulate" : accumulate_columns,
411
- "persist" : True
457
+ "persist" : True,
458
+ "display_table_name" : False
412
459
  }
413
460
  # Performing ordinal encoding transformation
414
461
  self.data = OrdinalEncodingTransform(**transform_params).result
462
+ # Adding transformed data containing table to garbage collector
463
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
415
464
  # Extracting parameters for target encoding
416
465
  custom_target_encoding_ind = self.data_transformation_params.get("custom_target_encoding_ind", False)
417
466
  custom_target_encoding_fit_obj = self.data_transformation_params.get("custom_target_encoding_fit_obj", None)
@@ -424,12 +473,16 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
424
473
  "data" : self.data,
425
474
  "object" : tar_fit_obj,
426
475
  "accumulate" : accumulate_columns,
427
- "persist" : True
476
+ "persist" : True,
477
+ "display_table_name" : False
428
478
  }
429
- # Performing ordinal encoding transformation
479
+ # Performing target encoding transformation
430
480
  self.data = TargetEncodingTransform(**transform_params).result
481
+ # Adding transformed data containing table to garbage collector
482
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
431
483
  self._display_msg(msg="\nUpdated dataset after performing customized categorical encoding :",
432
- data=self.data)
484
+ data=self.data,
485
+ progress_bar=self.progress_bar)
433
486
 
434
487
  # Handling rest with default categorical encoding transformation
435
488
  self._categorical_encoding_transformation()
@@ -468,12 +521,16 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
468
521
  "data" : self.data,
469
522
  "object" : custom_numerical_transformation_fit_object,
470
523
  "id_columns" : custom_numerical_transformation_id_columns,
471
- "persist" :True
524
+ "persist" :True,
525
+ "display_table_name" : False
472
526
  }
473
527
  # Peforming transformation on target columns
474
528
  self.data = Transform(**transform_params).result
529
+ # Adding transformed data containing table to garbage collector
530
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
475
531
  self._display_msg(msg="\nUpdated dataset after performing customized mathematical transformation :",
476
- data=self.data)
532
+ data=self.data,
533
+ progress_bar=self.progress_bar)
477
534
 
478
535
  def _custom_non_linear_transformation(self):
479
536
  """
@@ -491,12 +548,16 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
491
548
  "data" : self.data,
492
549
  "object" : fit_obj,
493
550
  "accumulate" : self.data.columns,
494
- "persist" : True
551
+ "persist" : True,
552
+ "display_table_name" : False
495
553
  }
496
554
  # Performing transformation
497
555
  self.data = NonLinearCombineTransform(**transform_params).result
556
+ # Adding transformed data containing table to garbage collector
557
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
498
558
  self._display_msg(msg="\nUpdated dataset after performing customized non-linear transformation :",
499
- data=self.data)
559
+ data=self.data,
560
+ progress_bar=self.progress_bar)
500
561
 
501
562
  def _custom_anti_select_column_transformation(self):
502
563
  """
@@ -516,7 +577,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
516
577
  # Performing transformation for given user input
517
578
  self.data = Antiselect(**fit_params).result
518
579
  self._display_msg(msg="\nUpdated dataset after performing customized anti-selection :",
519
- data=self.data)
580
+ data=self.data,
581
+ progress_bar=self.progress_bar)
520
582
 
521
583
  def _handle_generated_features_transformation(self):
522
584
  """
@@ -539,8 +601,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
539
601
  "target_columns" : round_columns,
540
602
  "precision_digit" : 4,
541
603
  "accumulate" : accumulate_columns,
542
- "persist" : True}
604
+ "persist" : True,
605
+ "display_table_name" : False}
543
606
  self.data = RoundColumns(**fit_params).result
607
+ # Adding transformed data containing table to garbage collector
608
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
544
609
 
545
610
  def _handle_target_column_transformation(self):
546
611
  """
@@ -561,11 +626,13 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
561
626
  "data" : self.data,
562
627
  "object" : target_col_ord_encoding_fit_obj,
563
628
  "accumulate" : accumulate_columns,
564
- "persist" : True
629
+ "persist" : True,
630
+ "display_table_name" : False
565
631
  }
566
632
  # Performing ordinal encoding transformation
567
633
  self.data = OrdinalEncodingTransform(**transform_params).result
568
-
634
+ # Adding transformed data containing table to garbage collector
635
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
569
636
  # Converting target column to integer datatype
570
637
  params = {
571
638
  "data" : self.data,
@@ -575,7 +642,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
575
642
  }
576
643
  self.data = ConvertTo(**params).result
577
644
  self._display_msg(msg="\nUpdated dataset after performing target column transformation :",
578
- data=self.data)
645
+ data=self.data,
646
+ progress_bar=self.progress_bar)
579
647
 
580
648
  def _extract_and_display_features(self, feature_type, feature_list):
581
649
  """
@@ -605,7 +673,8 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
605
673
 
606
674
  # Displaying feature dataframe
607
675
  self._display_msg(msg=f"\nUpdated dataset after performing {feature_type} feature selection:",
608
- data=feature_df)
676
+ data=feature_df,
677
+ progress_bar=self.progress_bar)
609
678
 
610
679
  # Returning feature dataframe
611
680
  return feature_df
@@ -631,12 +700,14 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
631
700
  accumulate=accumulate_cols).result
632
701
  # Displaying scaled dataset
633
702
  self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
634
- data=lasso_df)
703
+ data=lasso_df,
704
+ progress_bar=self.progress_bar)
635
705
 
636
706
  # Uploading lasso dataset to table for further use
637
707
  table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_new_test",
638
708
  table_type = TeradataConstants.TERADATA_TABLE)
639
- self.table_name_mapping["lasso_new_test"] = table_name
709
+ # Storing table name mapping for lasso dataset
710
+ self.table_name_mapping[self.data_node_id]["lasso_new_test"] = table_name
640
711
  copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace")
641
712
 
642
713
  def _feature_selection_rfe_transformation(self):
@@ -667,12 +738,14 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
667
738
  accumulate=accumulate_cols).result
668
739
  # Displaying scaled dataset
669
740
  self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
670
- data=rfe_df)
741
+ data=rfe_df,
742
+ progress_bar=self.progress_bar)
671
743
 
672
744
  # Uploading rfe dataset to table for further use
673
745
  table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_new_test",
674
746
  table_type = TeradataConstants.TERADATA_TABLE)
675
- self.table_name_mapping["rfe_new_test"] = table_name
747
+ # Storing table name mapping for rfe dataset
748
+ self.table_name_mapping[self.data_node_id]["rfe_new_test"] = table_name
676
749
  copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace")
677
750
 
678
751
  def _feature_selection_pca_transformation(self):
@@ -691,18 +764,25 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
691
764
  accumulate=accumulate_cols).result
692
765
  # Displaying scaled dataset
693
766
  self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
694
- data=pca_scaled_df)
767
+ data=pca_scaled_df,
768
+ progress_bar=self.progress_bar)
695
769
 
696
770
  # Convert to pandas dataframe for applying pca
697
771
  pca_scaled_pd = pca_scaled_df.to_pandas()
698
772
  # Extracting pca fit instance for applying pca
699
773
  pca_fit_instance = self.data_transformation_params.get("pca_fit_instance", None)
774
+ # Extracting columns for applying pca
775
+ pca_fit_columns = self.data_transformation_params.get("pca_fit_columns", None)
700
776
 
701
777
  # drop id column and target column if present
702
778
  drop_col = ['id']
703
779
  if self.target_column_ind:
704
780
  drop_col.append(self.data_target_column)
705
781
  pca_df = pca_scaled_pd.drop(columns=drop_col, axis=1)
782
+
783
+ # Rearranging columns to match the order used during PCA fitting to
784
+ # avoid issues during PCA transformation.
785
+ pca_df = pca_df[pca_fit_columns]
706
786
 
707
787
  # Applying pca on scaled dataset
708
788
  pca_df = pca_fit_instance.transform(pca_df)
@@ -718,10 +798,12 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
718
798
  pca_df[self.data_target_column] = pca_scaled_pd[self.data_target_column].reset_index(drop=True)
719
799
  # Displaying pca dataframe
720
800
  self._display_msg(msg="\nUpdated dataset after performing PCA feature selection :",
721
- data=pca_df)
801
+ data=pca_df.head(10),
802
+ progress_bar=self.progress_bar)
722
803
 
723
804
  # Uploading pca dataset to table for further use
724
805
  table_name = UtilFuncs._generate_temp_table_name(prefix="pca_new_test",
725
806
  table_type = TeradataConstants.TERADATA_TABLE)
726
- self.table_name_mapping["pca_new_test"] = table_name
807
+ # Storing table name mapping for pca dataset
808
+ self.table_name_mapping[self.data_node_id]["pca_new_test"] = table_name
727
809
  copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace")