teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (432) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +238 -1
  4. teradataml/__init__.py +13 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/Transformations.py +4 -4
  7. teradataml/analytics/__init__.py +0 -2
  8. teradataml/analytics/analytic_function_executor.py +3 -0
  9. teradataml/analytics/json_parser/utils.py +13 -12
  10. teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
  11. teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
  12. teradataml/analytics/sqle/__init__.py +0 -13
  13. teradataml/analytics/utils.py +1 -0
  14. teradataml/analytics/valib.py +3 -0
  15. teradataml/automl/__init__.py +1628 -0
  16. teradataml/automl/custom_json_utils.py +1270 -0
  17. teradataml/automl/data_preparation.py +993 -0
  18. teradataml/automl/data_transformation.py +727 -0
  19. teradataml/automl/feature_engineering.py +1648 -0
  20. teradataml/automl/feature_exploration.py +547 -0
  21. teradataml/automl/model_evaluation.py +163 -0
  22. teradataml/automl/model_training.py +887 -0
  23. teradataml/catalog/__init__.py +0 -2
  24. teradataml/catalog/byom.py +49 -6
  25. teradataml/catalog/function_argument_mapper.py +0 -2
  26. teradataml/catalog/model_cataloging_utils.py +2 -1021
  27. teradataml/common/aed_utils.py +6 -2
  28. teradataml/common/constants.py +50 -58
  29. teradataml/common/deprecations.py +160 -0
  30. teradataml/common/garbagecollector.py +61 -104
  31. teradataml/common/messagecodes.py +27 -36
  32. teradataml/common/messages.py +11 -15
  33. teradataml/common/utils.py +205 -287
  34. teradataml/common/wrapper_utils.py +1 -110
  35. teradataml/context/context.py +150 -78
  36. teradataml/data/bank_churn.csv +10001 -0
  37. teradataml/data/bmi.csv +501 -0
  38. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
  40. teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
  42. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
  43. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
  44. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
  45. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
  46. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
  47. teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
  48. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
  49. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
  50. teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
  51. teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
  52. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
  53. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
  54. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
  55. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
  56. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
  57. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
  58. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
  59. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
  60. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
  61. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
  62. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
  63. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
  64. teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
  65. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
  66. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
  67. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
  68. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
  69. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
  70. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
  71. teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
  72. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
  73. teradataml/data/fish.csv +160 -0
  74. teradataml/data/glass_types.csv +215 -0
  75. teradataml/data/insurance.csv +1 -1
  76. teradataml/data/iris_data.csv +151 -0
  77. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
  78. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
  79. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
  80. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
  81. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
  82. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
  83. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
  84. teradataml/data/load_example_data.py +3 -0
  85. teradataml/data/multi_model_classification.csv +401 -0
  86. teradataml/data/multi_model_regression.csv +401 -0
  87. teradataml/data/openml_example.json +63 -0
  88. teradataml/data/scripts/deploy_script.py +65 -0
  89. teradataml/data/scripts/mapper.R +20 -0
  90. teradataml/data/scripts/sklearn/__init__.py +0 -0
  91. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  92. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  93. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  94. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  95. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  96. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  97. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  98. teradataml/data/templates/open_source_ml.json +9 -0
  99. teradataml/data/teradataml_example.json +73 -1
  100. teradataml/data/test_classification.csv +101 -0
  101. teradataml/data/test_prediction.csv +101 -0
  102. teradataml/data/test_regression.csv +101 -0
  103. teradataml/data/train_multiclass.csv +101 -0
  104. teradataml/data/train_regression.csv +101 -0
  105. teradataml/data/train_regression_multiple_labels.csv +101 -0
  106. teradataml/data/wine_data.csv +1600 -0
  107. teradataml/dataframe/copy_to.py +79 -13
  108. teradataml/dataframe/data_transfer.py +8 -0
  109. teradataml/dataframe/dataframe.py +910 -311
  110. teradataml/dataframe/dataframe_utils.py +102 -5
  111. teradataml/dataframe/fastload.py +11 -3
  112. teradataml/dataframe/setop.py +15 -2
  113. teradataml/dataframe/sql.py +3735 -77
  114. teradataml/dataframe/sql_function_parameters.py +56 -5
  115. teradataml/dataframe/vantage_function_types.py +45 -1
  116. teradataml/dataframe/window.py +30 -29
  117. teradataml/dbutils/dbutils.py +18 -1
  118. teradataml/geospatial/geodataframe.py +18 -7
  119. teradataml/geospatial/geodataframecolumn.py +5 -0
  120. teradataml/hyperparameter_tuner/optimizer.py +910 -120
  121. teradataml/hyperparameter_tuner/utils.py +131 -37
  122. teradataml/lib/aed_0_1.dll +0 -0
  123. teradataml/lib/libaed_0_1.dylib +0 -0
  124. teradataml/lib/libaed_0_1.so +0 -0
  125. teradataml/libaed_0_1.dylib +0 -0
  126. teradataml/libaed_0_1.so +0 -0
  127. teradataml/opensource/__init__.py +1 -0
  128. teradataml/opensource/sklearn/__init__.py +1 -0
  129. teradataml/opensource/sklearn/_class.py +255 -0
  130. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  131. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  132. teradataml/opensource/sklearn/constants.py +54 -0
  133. teradataml/options/__init__.py +3 -6
  134. teradataml/options/configure.py +21 -20
  135. teradataml/scriptmgmt/UserEnv.py +61 -5
  136. teradataml/scriptmgmt/lls_utils.py +135 -53
  137. teradataml/table_operators/Apply.py +38 -6
  138. teradataml/table_operators/Script.py +45 -308
  139. teradataml/table_operators/TableOperator.py +182 -591
  140. teradataml/table_operators/__init__.py +0 -1
  141. teradataml/table_operators/table_operator_util.py +32 -40
  142. teradataml/utils/validators.py +127 -3
  143. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
  144. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
  145. teradataml/analytics/mle/AdaBoost.py +0 -651
  146. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  147. teradataml/analytics/mle/Antiselect.py +0 -342
  148. teradataml/analytics/mle/Arima.py +0 -641
  149. teradataml/analytics/mle/ArimaPredict.py +0 -477
  150. teradataml/analytics/mle/Attribution.py +0 -1070
  151. teradataml/analytics/mle/Betweenness.py +0 -658
  152. teradataml/analytics/mle/Burst.py +0 -711
  153. teradataml/analytics/mle/CCM.py +0 -600
  154. teradataml/analytics/mle/CCMPrepare.py +0 -324
  155. teradataml/analytics/mle/CFilter.py +0 -460
  156. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  157. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  158. teradataml/analytics/mle/Closeness.py +0 -737
  159. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  160. teradataml/analytics/mle/Correlation.py +0 -477
  161. teradataml/analytics/mle/Correlation2.py +0 -573
  162. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  163. teradataml/analytics/mle/CoxPH.py +0 -556
  164. teradataml/analytics/mle/CoxSurvival.py +0 -478
  165. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  166. teradataml/analytics/mle/DTW.py +0 -623
  167. teradataml/analytics/mle/DWT.py +0 -564
  168. teradataml/analytics/mle/DWT2D.py +0 -599
  169. teradataml/analytics/mle/DecisionForest.py +0 -716
  170. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  171. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  172. teradataml/analytics/mle/DecisionTree.py +0 -830
  173. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  174. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  175. teradataml/analytics/mle/FMeasure.py +0 -402
  176. teradataml/analytics/mle/FPGrowth.py +0 -734
  177. teradataml/analytics/mle/FrequentPaths.py +0 -695
  178. teradataml/analytics/mle/GLM.py +0 -558
  179. teradataml/analytics/mle/GLML1L2.py +0 -547
  180. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  181. teradataml/analytics/mle/GLMPredict.py +0 -529
  182. teradataml/analytics/mle/HMMDecoder.py +0 -945
  183. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  184. teradataml/analytics/mle/HMMSupervised.py +0 -521
  185. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  186. teradataml/analytics/mle/Histogram.py +0 -561
  187. teradataml/analytics/mle/IDWT.py +0 -476
  188. teradataml/analytics/mle/IDWT2D.py +0 -493
  189. teradataml/analytics/mle/IdentityMatch.py +0 -763
  190. teradataml/analytics/mle/Interpolator.py +0 -918
  191. teradataml/analytics/mle/KMeans.py +0 -485
  192. teradataml/analytics/mle/KNN.py +0 -627
  193. teradataml/analytics/mle/KNNRecommender.py +0 -488
  194. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  195. teradataml/analytics/mle/LAR.py +0 -439
  196. teradataml/analytics/mle/LARPredict.py +0 -478
  197. teradataml/analytics/mle/LDA.py +0 -548
  198. teradataml/analytics/mle/LDAInference.py +0 -492
  199. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  200. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  201. teradataml/analytics/mle/LinReg.py +0 -433
  202. teradataml/analytics/mle/LinRegPredict.py +0 -438
  203. teradataml/analytics/mle/MinHash.py +0 -544
  204. teradataml/analytics/mle/Modularity.py +0 -587
  205. teradataml/analytics/mle/NEREvaluator.py +0 -410
  206. teradataml/analytics/mle/NERExtractor.py +0 -595
  207. teradataml/analytics/mle/NERTrainer.py +0 -458
  208. teradataml/analytics/mle/NGrams.py +0 -570
  209. teradataml/analytics/mle/NPath.py +0 -634
  210. teradataml/analytics/mle/NTree.py +0 -549
  211. teradataml/analytics/mle/NaiveBayes.py +0 -462
  212. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  213. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  214. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  215. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  216. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  217. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  218. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  219. teradataml/analytics/mle/POSTagger.py +0 -417
  220. teradataml/analytics/mle/Pack.py +0 -411
  221. teradataml/analytics/mle/PageRank.py +0 -535
  222. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  223. teradataml/analytics/mle/PathGenerator.py +0 -367
  224. teradataml/analytics/mle/PathStart.py +0 -464
  225. teradataml/analytics/mle/PathSummarizer.py +0 -470
  226. teradataml/analytics/mle/Pivot.py +0 -471
  227. teradataml/analytics/mle/ROC.py +0 -425
  228. teradataml/analytics/mle/RandomSample.py +0 -637
  229. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  230. teradataml/analytics/mle/SAX.py +0 -779
  231. teradataml/analytics/mle/SVMDense.py +0 -677
  232. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  233. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  234. teradataml/analytics/mle/SVMSparse.py +0 -557
  235. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  236. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  237. teradataml/analytics/mle/Sampling.py +0 -549
  238. teradataml/analytics/mle/Scale.py +0 -565
  239. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  240. teradataml/analytics/mle/ScaleMap.py +0 -378
  241. teradataml/analytics/mle/ScaleSummary.py +0 -320
  242. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  243. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  244. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  245. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  246. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  247. teradataml/analytics/mle/Sessionize.py +0 -475
  248. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  249. teradataml/analytics/mle/StringSimilarity.py +0 -425
  250. teradataml/analytics/mle/TF.py +0 -389
  251. teradataml/analytics/mle/TFIDF.py +0 -504
  252. teradataml/analytics/mle/TextChunker.py +0 -414
  253. teradataml/analytics/mle/TextClassifier.py +0 -399
  254. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  255. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  256. teradataml/analytics/mle/TextMorph.py +0 -494
  257. teradataml/analytics/mle/TextParser.py +0 -623
  258. teradataml/analytics/mle/TextTagger.py +0 -530
  259. teradataml/analytics/mle/TextTokenizer.py +0 -502
  260. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  261. teradataml/analytics/mle/Unpack.py +0 -526
  262. teradataml/analytics/mle/Unpivot.py +0 -438
  263. teradataml/analytics/mle/VarMax.py +0 -776
  264. teradataml/analytics/mle/VectorDistance.py +0 -762
  265. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  266. teradataml/analytics/mle/XGBoost.py +0 -842
  267. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  268. teradataml/analytics/mle/__init__.py +0 -123
  269. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  270. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  271. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  272. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  273. teradataml/analytics/mle/json/arima_mle.json +0 -172
  274. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  275. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  276. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  277. teradataml/analytics/mle/json/burst_mle.json +0 -140
  278. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  279. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  280. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  281. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  282. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  283. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  284. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  285. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  286. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  287. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  288. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  289. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  290. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  291. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  292. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  293. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  294. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  295. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  296. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  297. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  298. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  299. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  300. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  301. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  302. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  303. teradataml/analytics/mle/json/glm_mle.json +0 -111
  304. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  305. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  306. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  307. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  308. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  309. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  310. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  311. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  312. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  313. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  314. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  315. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  316. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  317. teradataml/analytics/mle/json/knn_mle.json +0 -141
  318. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  319. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  320. teradataml/analytics/mle/json/lar_mle.json +0 -78
  321. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  322. teradataml/analytics/mle/json/lda_mle.json +0 -130
  323. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  324. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  325. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  326. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  327. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  328. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  329. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  330. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  331. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  332. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  333. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  334. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  335. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  336. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  337. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  338. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  339. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  340. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  341. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  342. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  343. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  344. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  345. teradataml/analytics/mle/json/pack_mle.json +0 -58
  346. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  347. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  348. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  349. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  350. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  351. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  352. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  353. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  354. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  355. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  356. teradataml/analytics/mle/json/roc_mle.json +0 -73
  357. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  358. teradataml/analytics/mle/json/sax_mle.json +0 -154
  359. teradataml/analytics/mle/json/scale_mle.json +0 -93
  360. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  361. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  362. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  363. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  364. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  365. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  366. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  367. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  368. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  369. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  370. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  371. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  372. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  373. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  374. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  375. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  376. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  377. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  378. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  379. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  380. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  381. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  382. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  383. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  384. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  385. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  386. teradataml/analytics/mle/json/tf_mle.json +0 -33
  387. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  388. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  389. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  390. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  391. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  392. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  393. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  394. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  395. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  396. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  397. teradataml/analytics/sqle/Antiselect.py +0 -321
  398. teradataml/analytics/sqle/Attribution.py +0 -603
  399. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  400. teradataml/analytics/sqle/GLMPredict.py +0 -430
  401. teradataml/analytics/sqle/MovingAverage.py +0 -543
  402. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  403. teradataml/analytics/sqle/NPath.py +0 -632
  404. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  405. teradataml/analytics/sqle/Pack.py +0 -388
  406. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  407. teradataml/analytics/sqle/Sessionize.py +0 -390
  408. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  409. teradataml/analytics/sqle/Unpack.py +0 -503
  410. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  411. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  412. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  413. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  414. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  415. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  416. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  417. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  418. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  419. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  420. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  421. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  422. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  423. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  424. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  425. teradataml/catalog/model_cataloging.py +0 -980
  426. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  427. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  428. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  429. teradataml/table_operators/sandbox_container_util.py +0 -643
  430. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
  431. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
  432. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,727 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2024 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import pandas as pd
18
+
19
+ # Teradata libraries
20
+ from teradataml.dataframe.dataframe import DataFrame
21
+ from teradataml.dataframe.copy_to import copy_to_sql
22
+ from teradataml import Antiselect
23
+ from teradataml import BincodeTransform
24
+ from teradataml import ConvertTo
25
+ from teradataml import FillRowId
26
+ from teradataml import NonLinearCombineTransform
27
+ from teradataml import OneHotEncodingTransform
28
+ from teradataml import OrdinalEncodingTransform
29
+ from teradataml import RoundColumns
30
+ from teradataml import ScaleTransform
31
+ from teradataml import SimpleImputeTransform
32
+ from teradataml import TargetEncodingTransform
33
+ from teradataml import Transform, UtilFuncs, TeradataConstants
34
+
35
+ # AutoML Internal libraries
36
+ from teradataml.automl.feature_exploration import _FeatureExplore
37
+ from teradataml.automl.feature_engineering import _FeatureEngineering
38
+
39
+
40
+ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
41
+
42
+ def __init__(self,
43
+ data,
44
+ data_transformation_params,
45
+ auto = True,
46
+ verbose = 0,
47
+ target_column_ind = False,
48
+ table_name_mapping = {}):
49
+ """
50
+ DESCRIPTION:
51
+ Function initializes the data, data transformation object and running mode
52
+ for data transformation.
53
+
54
+ PARAMETERS:
55
+ data:
56
+ Required Argument.
57
+ Specifies the input teradataml Dataframe for data transformation phase.
58
+ Types: teradataml Dataframe
59
+
60
+ data_transformation_params:
61
+ Required Arugment.
62
+ Specifies the parameters for performing data transformation.
63
+ Types: dict
64
+
65
+ auto:
66
+ Optional Arugment.
67
+ Specifies whether to run AutoML in custom mode or auto mode.
68
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
69
+ Default Value: True
70
+ Types: bool
71
+
72
+ verbose:
73
+ Optional Argument.
74
+ Specifies the detailed execution steps based on verbose level.
75
+ Default Value: 0
76
+ Permitted Values:
77
+ * 0: prints no details about data transformation.
78
+ * 1: prints the execution steps of data transformation.
79
+ * 2: prints the intermediate data between the each step of data transformation.
80
+ Types: int
81
+
82
+ target_column_ind:
83
+ Optional Arugment.
84
+ Specifies whether target column is present in given dataset.
85
+ Default Value: False
86
+ Types: bool
87
+ """
88
+ self.data = data
89
+ self.data_transformation_params = data_transformation_params
90
+ self.auto = auto
91
+ self.verbose = verbose
92
+ self.target_column_ind = target_column_ind
93
+ self.table_name_mapping = table_name_mapping
94
+
95
+ def data_transformation(self):
96
+ """
97
+ DESCRIPTION:
98
+ Function to perform following tasks:
99
+ 1. Performs transformation carried out in feature engineering phase on the test data.
100
+ 2. Performs transformation carried out in data preparation phase on the test data.
101
+
102
+ RETURNS:
103
+ Table name mapping for the transformed data.
104
+ """
105
+ # Initializing Feature Exploration
106
+ _FeatureExplore.__init__(self,
107
+ data = self.data,
108
+ target_column = None,
109
+ verbose = self.verbose)
110
+ # Initializing Feature Engineering
111
+ _FeatureEngineering.__init__(self,
112
+ data = self.data,
113
+ target_column = None,
114
+ model_list = None,
115
+ verbose = self.verbose)
116
+
117
+ self._display_msg(msg="Data Transformation started ...", show_data=True)
118
+ # Extracting target column details and type whether it is classification or not
119
+ self.data_target_column = self.data_transformation_params.get("data_target_column")
120
+ self.classification_type = self.data_transformation_params.get("classification_type", False)
121
+ # Performing transformation carried out in feature engineering phase
122
+ self.feature_engineering_transformation()
123
+ # Performing transformation carried out in data preparation phase
124
+ self.data_preparation_transformation()
125
+ self._display_msg(msg="Data Transformation completed.", show_data=True)
126
+
127
+ return self.table_name_mapping
128
+
129
+ def feature_engineering_transformation(self):
130
+ """
131
+ DESCRIPTION:
132
+ Function performs transformation carried out in feature engineering phase
133
+ on test data using parameters from data_transformation_params.
134
+ """
135
+ self._display_msg(msg="Performing transformation carried out in feature engineering phase ...",
136
+ show_data=True)
137
+ # Performing default transformation for both auto and custom mode
138
+ self._preprocess_transformation()
139
+ self._futile_column_handling_transformation()
140
+ # Handling target column transformation
141
+ if self.target_column_ind and self.classification_type:
142
+ self._handle_target_column_transformation()
143
+ self._date_column_handling_transformation()
144
+
145
+ # Performing transformation according to run mode
146
+ if self.auto:
147
+ self._missing_value_handling_transformation()
148
+ self._categorical_encoding_transformation()
149
+ else:
150
+ self._custom_missing_value_handling_transformation()
151
+ self._custom_bincode_column_transformation()
152
+ self._custom_string_column_transformation()
153
+ self._custom_categorical_encoding_transformation()
154
+ self._custom_mathematical_transformation()
155
+ self._custom_non_linear_transformation()
156
+ self._custom_anti_select_column_transformation()
157
+
158
+ def data_preparation_transformation(self):
159
+ """
160
+ DESCRIPTION:
161
+ Function performs transformation carried out in data preparation phase
162
+ on test data using parameters from data_transformation_params.
163
+ """
164
+ self._display_msg(msg="Performing transformation carried out in data preparation phase ...",
165
+ show_data=True)
166
+ # Handling features transformed from feature engineering phase
167
+ self._handle_generated_features_transformation()
168
+
169
+ # Performing transformation including feature selection using lasso, rfe and pca
170
+ # followed by scaling
171
+ self._feature_selection_lasso_transformation()
172
+ self._feature_selection_rfe_transformation()
173
+ self._feature_selection_pca_transformation()
174
+
175
+ def _preprocess_transformation(self):
176
+ """
177
+ DESCRIPTION:
178
+ Function drops irrelevent columns and adds id column.
179
+ """
180
+ # Extracting irrelevent column list
181
+ columns_to_be_removed = self.data_transformation_params.get("drop_irrelevent_columns", None)
182
+ if columns_to_be_removed:
183
+ self.data = self.data.drop(columns_to_be_removed, axis=1)
184
+ self._display_msg(msg="\nUpdated dataset after dropping irrelevent columns :",
185
+ data=self.data)
186
+
187
+ # Adding id column
188
+ self.data = FillRowId(data=self.data, row_id_column='id').result
189
+
190
+ def _futile_column_handling_transformation(self):
191
+ """
192
+ DESCRIPTION:
193
+ Function drops futile columns from dataset.
194
+ """
195
+ # Extracting futile column list
196
+ futile_cols = self.data_transformation_params.get("futile_columns", None)
197
+ if futile_cols:
198
+ self.data = self.data.drop(futile_cols, axis=1)
199
+ self._display_msg(msg="\nUpdated dataset after dropping futile columns :",
200
+ data=self.data)
201
+
202
+ def _date_column_handling_transformation(self):
203
+ """
204
+ DESCRIPTION:
205
+ Function performs transformation on date columns and generates new columns.
206
+ """
207
+ # Extracting date columns
208
+ date_columns = self.data_transformation_params.get("date_columns",None)
209
+ if date_columns:
210
+ # Dropping rows with null values in date columns
211
+ self.data = self.data.dropna(subset=date_columns)
212
+ # Extracting unique date columns for dropping
213
+ drop_unique_date_columns = self.data_transformation_params.get("drop_unique_date_columns",None)
214
+ if drop_unique_date_columns:
215
+ self.data = self.data.drop(drop_unique_date_columns, axis=1)
216
+
217
+ # Extracting date components parameters for new columns generation
218
+ extract_date_comp_param = self.data_transformation_params.get("extract_date_comp_param",None)
219
+ extract_date_comp_col = self.data_transformation_params.get("extract_date_comp_col", None)
220
+ if extract_date_comp_param:
221
+ self.data=self.data.assign(**extract_date_comp_param)
222
+ self.data = self.data.drop(extract_date_comp_col, axis=1)
223
+
224
+ # Extracting irrelevant date component columns for dropping
225
+ drop_extract_date_columns = self.data_transformation_params.get("drop_extract_date_columns", None)
226
+ if drop_extract_date_columns:
227
+ self.data = self.data.drop(drop_extract_date_columns, axis=1)
228
+
229
+ # Extracting date component fit objects for bincode transformation
230
+ day_component_fit_object = self.data_transformation_params.get("day_component_fit_object", None)
231
+ month_component_fit_object = self.data_transformation_params.get("month_component_fit_object", None)
232
+ year_diff_component_fit_object = self.data_transformation_params.get("year_diff_component_fit_object", None)
233
+
234
+ # Performing bincode transformation on day, month and year components
235
+ for fit_object in [day_component_fit_object, month_component_fit_object, year_diff_component_fit_object]:
236
+ if fit_object:
237
+ for col, bin_code_fit in fit_object.items():
238
+ accumulate_columns = self._extract_list(self.data.columns, [col])
239
+ transform_params = {
240
+ "data": self.data,
241
+ "object": bin_code_fit,
242
+ "accumulate": accumulate_columns,
243
+ "persist": True
244
+ }
245
+ self.data = BincodeTransform(**transform_params).result
246
+
247
+ self._display_msg(msg="\nUpdated dataset after transforming date columns :",
248
+ data=self.data)
249
+
250
+ def _missing_value_handling_transformation(self):
251
+ """
252
+ DESCRIPTION:
253
+ Function performs missing value handling by dropping columns and imputing columns.
254
+ """
255
+ # Extracting missing value containing columns to be dropped
256
+ drop_cols = self.data_transformation_params.get("drop_missing_columns", None)
257
+ if drop_cols:
258
+ self.data = self.data.drop(drop_cols, axis=1)
259
+ self._display_msg(msg="\nUpdated dataset after dropping missing value containing columns : ",
260
+ data=self.data)
261
+
262
+ # Extracting imputation columns and fit object for missing value imputation
263
+ imputation_cols = self.data_transformation_params.get("imputation_columns", None)
264
+ if imputation_cols:
265
+ sm_fit_obj = self.data_transformation_params.get("imputation_fit_object")
266
+ # imputing column using fit object
267
+ self.data = SimpleImputeTransform(data=self.data,
268
+ object=sm_fit_obj,
269
+ volatile=True).result
270
+ self._display_msg(msg="\nUpdated dataset after imputing missing value containing columns :",
271
+ data=self.data)
272
+
273
+ # Handling rest null, its temporary solution. It subjects to change based on input.
274
+ dropped_data = self.data.dropna()
275
+ dropped_count = self.data.shape[0] - dropped_data.shape[0]
276
+ if dropped_count > 0:
277
+ self.data = dropped_data
278
+ self._display_msg(msg="\nFound additional {} rows that contain missing values :".format(dropped_count),
279
+ data=self.data)
280
+ self._display_msg(msg="\nUpdated dataset after dropping additional missing value containing rows :",
281
+ data=self.data)
282
+
283
+ def _custom_missing_value_handling_transformation(self):
284
+ """
285
+ DESCRIPTION:
286
+ Function performs missing value handling by dropping columns and imputing
287
+ columns based on user input.
288
+ """
289
+ # Extracting custom missing value containing columns to be dropped
290
+ drop_col_list = self.data_transformation_params.get("custom_drop_missing_columns", None)
291
+ if drop_col_list:
292
+ self.data = self.data.drop(drop_col_list, axis=1)
293
+ self._display_msg(msg="\nUpdated dataset after dropping customized missing value containing columns :",
294
+ data=self.data)
295
+
296
+ # Extracting custom imputation columns and fit object for missing value imputation
297
+ custom_imp_ind = self.data_transformation_params.get("custom_imputation_ind", False)
298
+ if custom_imp_ind:
299
+ sm_fit_obj = self.data_transformation_params.get("custom_imputation_fit_object")
300
+ # imputing column using fit object
301
+ self.data = SimpleImputeTransform(data=self.data,
302
+ object=sm_fit_obj,
303
+ volatile=True).result
304
+ self._display_msg(msg="\nUpdated dataset after imputing customized missing value containing columns :",
305
+ data=self.data)
306
+ # Handling rest with default missing value handling
307
+ self._missing_value_handling_transformation()
308
+
309
+ def _custom_bincode_column_transformation(self):
310
+ """
311
+ DESCRIPTION:
312
+ Function performs bincode transformation on columns based on user input.
313
+ """
314
+ # Extracting custom bincode columns and fit object for bincode transformation
315
+ custom_bincode_ind = self.data_transformation_params.get("custom_bincode_ind", False)
316
+ if custom_bincode_ind:
317
+ # Handling bincode transformation for Equal-Width
318
+ custom_eql_bincode_col = self.data_transformation_params.get("custom_eql_bincode_col", None)
319
+ custom_eql_bincode_fit_object = self.data_transformation_params.get("custom_eql_bincode_fit_object", None)
320
+ if custom_eql_bincode_col:
321
+ # Extracting accumulate columns
322
+ accumulate_columns = self._extract_list(self.data.columns, custom_eql_bincode_col)
323
+ # Adding transform parameters for performing binning with Equal-Width.
324
+ eql_transform_params={
325
+ "data" : self.data,
326
+ "object" : custom_eql_bincode_fit_object,
327
+ "accumulate" : accumulate_columns,
328
+ "persist" : True,
329
+ }
330
+ self.data = BincodeTransform(**eql_transform_params).result
331
+ self._display_msg(msg="\nUpdated dataset after performing customized equal width bin-code transformation :",
332
+ data=self.data)
333
+
334
+ # Hnadling bincode transformation for Variable-Width
335
+ custom_var_bincode_col = self.data_transformation_params.get("custom_var_bincode_col", None)
336
+ custom_var_bincode_fit_object = self.data_transformation_params.get("custom_var_bincode_fit_object", None)
337
+ if custom_var_bincode_col:
338
+ # Extracting accumulate columns
339
+ accumulate_columns = self._extract_list(self.data.columns, custom_var_bincode_col)
340
+ # Adding transform parameters for performing binning with Variable-Width.
341
+ var_transform_params = {
342
+ "data" : self.data,
343
+ "object" : custom_var_bincode_fit_object,
344
+ "object_order_column" : "TD_MinValue_BINFIT",
345
+ "accumulate" : accumulate_columns,
346
+ "persist" : True
347
+ }
348
+ self.data = BincodeTransform(**var_transform_params).result
349
+ self._display_msg(msg="\nUpdated dataset after performing customized variable width bin-code transformation :",
350
+ data=self.data)
351
+
352
+ def _custom_string_column_transformation(self):
353
+ """
354
+ DESCRIPTION:
355
+ Function performs string column transformation on categorical columns based on user input.
356
+ """
357
+ # Extracting custom string manipulation columns and fit object for performing string manipulation
358
+ custom_string_manipulation_ind = self.data_transformation_params.get("custom_string_manipulation_ind", False)
359
+ if custom_string_manipulation_ind:
360
+ custom_string_manipulation_param = self.data_transformation_params.get('custom_string_manipulation_param', None)
361
+ # Performing string manipulation for each column
362
+ for target_col,transform_val in custom_string_manipulation_param.items():
363
+ self.data = self._str_method_mapping(target_col, transform_val)
364
+ self._display_msg(msg="\nUpdated dataset after performing customized string manipulation :",
365
+ data=self.data)
366
+
367
+ def _categorical_encoding_transformation(self):
368
+ """
369
+ DESCRIPTION:
370
+ Function performs default encoding transformation i.e, one-hot on categorical columns.
371
+ """
372
+ # Extracting one hot encoding parameters for performing encoding
373
+ one_hot_encoding_ind = self.data_transformation_params.get("one_hot_encoding_ind", False)
374
+ one_hot_encoding_fit_obj = self.data_transformation_params.get("one_hot_encoding_fit_obj", None)
375
+ one_hot_encoding_drop_list = self.data_transformation_params.get("one_hot_encoding_drop_list", None)
376
+ if one_hot_encoding_ind:
377
+ # Adding transform parameters for performing encoding
378
+ for fit_obj in one_hot_encoding_fit_obj.values():
379
+ transform_params = {
380
+ "data" : self.data,
381
+ "object" : fit_obj,
382
+ "is_input_dense" : True,
383
+ "persist" : True
384
+ }
385
+ # Performing one hot encoding transformation
386
+ self.data = OneHotEncodingTransform(**transform_params).result
387
+ # Dropping old columns after encoding
388
+ self.data = self.data.drop(one_hot_encoding_drop_list, axis=1)
389
+ self._display_msg(msg="\nUpdated dataset after performing categorical encoding :",
390
+ data=self.data)
391
+
392
+ def _custom_categorical_encoding_transformation(self):
393
+ """
394
+ DESCRIPTION:
395
+ Function performs custom encoding transformation on categorical columns based on user input.
396
+ """
397
+ # Extracting custom encoding parameters for performing encoding
398
+ custom_categorical_encoding_ind = self.data_transformation_params.get("custom_categorical_encoding_ind", False)
399
+ if custom_categorical_encoding_ind:
400
+ # Extracting parameters for ordinal encoding
401
+ custom_ord_encoding_fit_obj = self.data_transformation_params.get("custom_ord_encoding_fit_obj", None)
402
+ custom_ord_encoding_col = self.data_transformation_params.get("custom_ord_encoding_col", None)
403
+ if custom_ord_encoding_col:
404
+ # Extracting accumulate columns
405
+ accumulate_columns = self._extract_list(self.data.columns, custom_ord_encoding_col)
406
+ # Adding transform parameters for performing encoding
407
+ transform_params = {
408
+ "data" : self.data,
409
+ "object" : custom_ord_encoding_fit_obj,
410
+ "accumulate" : accumulate_columns,
411
+ "persist" : True
412
+ }
413
+ # Performing ordinal encoding transformation
414
+ self.data = OrdinalEncodingTransform(**transform_params).result
415
+ # Extracting parameters for target encoding
416
+ custom_target_encoding_ind = self.data_transformation_params.get("custom_target_encoding_ind", False)
417
+ custom_target_encoding_fit_obj = self.data_transformation_params.get("custom_target_encoding_fit_obj", None)
418
+ if custom_target_encoding_ind:
419
+ for col, tar_fit_obj in custom_target_encoding_fit_obj.items():
420
+ # Extracting accumulate columns
421
+ accumulate_columns = self._extract_list(self.data.columns, [col])
422
+ # Adding transform parameters for performing encoding
423
+ transform_params = {
424
+ "data" : self.data,
425
+ "object" : tar_fit_obj,
426
+ "accumulate" : accumulate_columns,
427
+ "persist" : True
428
+ }
429
+ # Performing ordinal encoding transformation
430
+ self.data = TargetEncodingTransform(**transform_params).result
431
+ self._display_msg(msg="\nUpdated dataset after performing customized categorical encoding :",
432
+ data=self.data)
433
+
434
+ # Handling rest with default categorical encoding transformation
435
+ self._categorical_encoding_transformation()
436
+
437
+ def _custom_mathematical_transformation(self):
438
+ """
439
+ DESCRIPTION:
440
+ Function performs custom mathematical transformation on numerical columns based on user input.
441
+ """
442
+ # Extracting custom mathematical transformation parameters for performing transformation
443
+ custom_mathematical_transformation_ind = self.data_transformation_params.get("custom_mathematical_transformation_ind", False)
444
+ if custom_mathematical_transformation_ind:
445
+ # Extracting parameters for performing numapply transformation
446
+ custom_numapply_transformation_param = self.data_transformation_params.get("custom_numapply_transformation_param", None)
447
+ # Checking if numapply transformation param is present
448
+ if custom_numapply_transformation_param:
449
+ # Performing transformation for each column
450
+ for col, transform_val in custom_numapply_transformation_param.items():
451
+ self.data = self._numapply_transformation(col,transform_val)
452
+
453
+ # Extracting parameters for performing numerical transformation
454
+ custom_numerical_transformation_fit_object = self.data_transformation_params.get("custom_numerical_transformation_fit_object", None)
455
+ # Checking if numerical transformation fit object is present
456
+ if custom_numerical_transformation_fit_object:
457
+ # Extracting id columns for performing transformation
458
+ custom_numerical_transformation_id_columns = self.data_transformation_params.get("custom_numerical_transformation_id_columns", None)
459
+ # Checking for target column presence and handling id columns accordingly
460
+ if not self.target_column_ind and \
461
+ self.data_target_column in custom_numerical_transformation_id_columns:
462
+ custom_numerical_transformation_id_columns = self._extract_list(
463
+ custom_numerical_transformation_id_columns,
464
+ [self.data_target_column])
465
+
466
+ # Adding transform parameters for transformation
467
+ transform_params={
468
+ "data" : self.data,
469
+ "object" : custom_numerical_transformation_fit_object,
470
+ "id_columns" : custom_numerical_transformation_id_columns,
471
+ "persist" :True
472
+ }
473
+ # Peforming transformation on target columns
474
+ self.data = Transform(**transform_params).result
475
+ self._display_msg(msg="\nUpdated dataset after performing customized mathematical transformation :",
476
+ data=self.data)
477
+
478
+ def _custom_non_linear_transformation(self):
479
+ """
480
+ DESCRIPTION:
481
+ Function performs custom non-linear transformation on numerical columns based on user input.
482
+ """
483
+ # Extracting custom non-linear transformation parameters for performing transformation
484
+ custom_non_linear_transformation_ind = self.data_transformation_params.get("custom_non_linear_transformation_ind", False)
485
+ if custom_non_linear_transformation_ind:
486
+ # Extracting fit object for non-linear transformation
487
+ fit_obj_list = self.data_transformation_params['custom_non_linear_transformation_fit_object']
488
+ for comb, fit_obj in fit_obj_list.items():
489
+ # Adding transform params for transformation
490
+ transform_params = {
491
+ "data" : self.data,
492
+ "object" : fit_obj,
493
+ "accumulate" : self.data.columns,
494
+ "persist" : True
495
+ }
496
+ # Performing transformation
497
+ self.data = NonLinearCombineTransform(**transform_params).result
498
+ self._display_msg(msg="\nUpdated dataset after performing customized non-linear transformation :",
499
+ data=self.data)
500
+
501
+ def _custom_anti_select_column_transformation(self):
502
+ """
503
+ DESCRIPTION:
504
+ Function performs custom anti-select transformation on columns based on user input.
505
+ """
506
+ # Extracting custom anti-select transformation parameters for performing transformation
507
+ custom_anti_select_columns_ind = self.data_transformation_params.get("custom_anti_select_columns_ind", False)
508
+ if custom_anti_select_columns_ind:
509
+ # Extracting anti-select column list
510
+ anti_select_list = self.data_transformation_params.get("custom_anti_select_columns",None)
511
+ if anti_select_list:
512
+ fit_params = {
513
+ "data" : self.data,
514
+ "exclude" : anti_select_list
515
+ }
516
+ # Performing transformation for given user input
517
+ self.data = Antiselect(**fit_params).result
518
+ self._display_msg(msg="\nUpdated dataset after performing customized anti-selection :",
519
+ data=self.data)
520
+
521
+ def _handle_generated_features_transformation(self):
522
+ """
523
+ DESCRIPTION:
524
+ Function performs rounding up transformation on generated features
525
+ from feature engineering phase.
526
+ """
527
+ # Extracting list of columns to be rounded
528
+ round_columns = self.data_transformation_params.get("round_columns", None)
529
+ if round_columns:
530
+ # Checking for target column presence and handling list accordingly
531
+ if not self.target_column_ind and self.data_target_column in round_columns:
532
+ round_columns = self._extract_list(round_columns, [self.data_target_column])
533
+
534
+ # Extracting accumulate columns
535
+ accumulate_columns = self._extract_list(self.data.columns,round_columns)
536
+ # Performing rounding up on target column upto 4 precision digit
537
+ fit_params = {
538
+ "data" : self.data,
539
+ "target_columns" : round_columns,
540
+ "precision_digit" : 4,
541
+ "accumulate" : accumulate_columns,
542
+ "persist" : True}
543
+ self.data = RoundColumns(**fit_params).result
544
+
545
+ def _handle_target_column_transformation(self):
546
+ """
547
+ DESCRIPTION:
548
+ Function performs encoding and datatype transformation on target column
549
+ for classification problem.
550
+ """
551
+ # Fetching target column encoding indicator and fit object
552
+ target_col_encode_ind = self.data_transformation_params.get("target_col_encode_ind", False)
553
+ if target_col_encode_ind:
554
+ # Extracting ordinal encoding fit object for target column
555
+ target_col_ord_encoding_fit_obj = self.data_transformation_params.get("target_col_ord_encoding_fit_obj", None)
556
+ if target_col_ord_encoding_fit_obj:
557
+ # Extracting accumulate columns
558
+ accumulate_columns = self._extract_list(self.data.columns, [self.data_target_column])
559
+ # Adding transform parameters for performing encoding
560
+ transform_params = {
561
+ "data" : self.data,
562
+ "object" : target_col_ord_encoding_fit_obj,
563
+ "accumulate" : accumulate_columns,
564
+ "persist" : True
565
+ }
566
+ # Performing ordinal encoding transformation
567
+ self.data = OrdinalEncodingTransform(**transform_params).result
568
+
569
+ # Converting target column to integer datatype
570
+ params = {
571
+ "data" : self.data,
572
+ "target_columns" : [self.data_target_column],
573
+ "target_datatype" : ["integer"],
574
+ "accumulate" : self._extract_list(self.data.columns, [self.data_target_column])
575
+ }
576
+ self.data = ConvertTo(**params).result
577
+ self._display_msg(msg="\nUpdated dataset after performing target column transformation :",
578
+ data=self.data)
579
+
580
+ def _extract_and_display_features(self, feature_type, feature_list):
581
+ """
582
+ DESCRIPTION:
583
+ Function performs extraction of features using feature_list and target column indicator.
584
+
585
+ PARAMETERS:
586
+ feature_type:
587
+ Required Argument.
588
+ Specifies the type of feature selection.
589
+ Types: str
590
+
591
+ feature_list:
592
+ Required Argument.
593
+ Specifies the list of features to be selected.
594
+ Types: list
595
+
596
+ RETURNS:
597
+ Teradataml dataframe with selected features.
598
+ """
599
+ # Checking for target column presence and handling list accordingly
600
+ if not self.target_column_ind and self.data_target_column in feature_list:
601
+ feature_list = self._extract_list(feature_list, [self.data_target_column])
602
+
603
+ # Creating dataframe with selected features
604
+ feature_df = self.data[feature_list]
605
+
606
+ # Displaying feature dataframe
607
+ self._display_msg(msg=f"\nUpdated dataset after performing {feature_type} feature selection:",
608
+ data=feature_df)
609
+
610
+ # Returning feature dataframe
611
+ return feature_df
612
+
613
+ def _feature_selection_lasso_transformation(self):
614
+ """
615
+ DESCRIPTION:
616
+ Function performs feature selection using lasso followed by scaling.
617
+ """
618
+ # Extracting features selected by lasso in data preparation phase
619
+ lasso_features = self.data_transformation_params.get("lasso_features", None)
620
+ lasso_df = self._extract_and_display_features("Lasso", lasso_features)
621
+
622
+ # Performing feature scaling
623
+ # Extracting fit object and columns for scaling
624
+ lasso_scale_fit_obj = self.data_transformation_params.get("lasso_scale_fit_obj", None)
625
+ lasso_scale_col = self.data_transformation_params.get("lasso_scale_col", None)
626
+ # Extracting accumulate columns
627
+ accumulate_cols = self._extract_list(lasso_df.columns, lasso_scale_col)
628
+ # Scaling dataset
629
+ lasso_df = ScaleTransform(data=lasso_df,
630
+ object=lasso_scale_fit_obj,
631
+ accumulate=accumulate_cols).result
632
+ # Displaying scaled dataset
633
+ self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
634
+ data=lasso_df)
635
+
636
+ # Uploading lasso dataset to table for further use
637
+ table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_new_test",
638
+ table_type = TeradataConstants.TERADATA_TABLE)
639
+ self.table_name_mapping["lasso_new_test"] = table_name
640
+ copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace")
641
+
642
+ def _feature_selection_rfe_transformation(self):
643
+ """
644
+ DESCRIPTION:
645
+ Function performs feature selection using rfe followed by scaling.
646
+ """
647
+ # Extracting features selected by rfe in data preparation phase
648
+ rfe_features = self.data_transformation_params.get("rfe_features", None)
649
+ rfe_df = self._extract_and_display_features("RFE", rfe_features)
650
+
651
+ # Renaming rfe columns
652
+ rfe_rename_column = self.data_transformation_params.get("rfe_rename_column", None)
653
+ if rfe_rename_column:
654
+ new_col_name = {f'r_{col}': rfe_df[col] for col in rfe_rename_column}
655
+ rfe_df = rfe_df.assign(drop_columns=False, **new_col_name)
656
+ rfe_df = rfe_df.drop(rfe_rename_column, axis=1)
657
+
658
+ # Performing feature scaling
659
+ # Extracting fit object and columns for scaling
660
+ rfe_scale_fit_obj = self.data_transformation_params.get("rfe_scale_fit_obj", None)
661
+ rfe_scale_col = self.data_transformation_params.get("rfe_scale_col", None)
662
+ # Extracting accumulate columns
663
+ accumulate_cols = self._extract_list(rfe_df.columns, rfe_scale_col)
664
+ # Scaling on rfe dataset
665
+ rfe_df = ScaleTransform(data=rfe_df,
666
+ object=rfe_scale_fit_obj,
667
+ accumulate=accumulate_cols).result
668
+ # Displaying scaled dataset
669
+ self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
670
+ data=rfe_df)
671
+
672
+ # Uploading rfe dataset to table for further use
673
+ table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_new_test",
674
+ table_type = TeradataConstants.TERADATA_TABLE)
675
+ self.table_name_mapping["rfe_new_test"] = table_name
676
+ copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace")
677
+
678
+ def _feature_selection_pca_transformation(self):
679
+ """
680
+ DESCRIPTION:
681
+ Function performs feature scaling followed by feature selection using pca.
682
+ """
683
+ # Extracting fit object and column details for perfroming feature scaling
684
+ pca_scale_fit_obj = self.data_transformation_params.get("pca_scale_fit_obj", None)
685
+ pca_scale_col = self.data_transformation_params.get("pca_scale_col", None)
686
+ # Extracting accumulate columns
687
+ accumulate_cols = self._extract_list(self.data.columns, pca_scale_col)
688
+ # Scaling on pca dataset
689
+ pca_scaled_df = ScaleTransform(data=self.data,
690
+ object=pca_scale_fit_obj,
691
+ accumulate=accumulate_cols).result
692
+ # Displaying scaled dataset
693
+ self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
694
+ data=pca_scaled_df)
695
+
696
+ # Convert to pandas dataframe for applying pca
697
+ pca_scaled_pd = pca_scaled_df.to_pandas()
698
+ # Extracting pca fit instance for applying pca
699
+ pca_fit_instance = self.data_transformation_params.get("pca_fit_instance", None)
700
+
701
+ # drop id column and target column if present
702
+ drop_col = ['id']
703
+ if self.target_column_ind:
704
+ drop_col.append(self.data_target_column)
705
+ pca_df = pca_scaled_pd.drop(columns=drop_col, axis=1)
706
+
707
+ # Applying pca on scaled dataset
708
+ pca_df = pca_fit_instance.transform(pca_df)
709
+ # Converting to pandas dataframe
710
+ pca_df = pd.DataFrame(pca_df)
711
+ # Renaming pca columns
712
+ pca_new_column = self.data_transformation_params.get("pca_new_column", None)
713
+ pca_df.rename(columns=pca_new_column, inplace=True)
714
+ # Adding id column to pca dataframe
715
+ pca_df = pd.concat([pca_scaled_pd.reset_index(drop=True)['id'], pca_df.reset_index(drop=True)], axis=1)
716
+ # Adding target column to pca dataframe if present
717
+ if self.target_column_ind:
718
+ pca_df[self.data_target_column] = pca_scaled_pd[self.data_target_column].reset_index(drop=True)
719
+ # Displaying pca dataframe
720
+ self._display_msg(msg="\nUpdated dataset after performing PCA feature selection :",
721
+ data=pca_df)
722
+
723
+ # Uploading pca dataset to table for further use
724
+ table_name = UtilFuncs._generate_temp_table_name(prefix="pca_new_test",
725
+ table_type = TeradataConstants.TERADATA_TABLE)
726
+ self.table_name_mapping["pca_new_test"] = table_name
727
+ copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace")