teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1285) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1864 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2013 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +804 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1628 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +993 -0
  31. teradataml/automl/data_transformation.py +727 -0
  32. teradataml/automl/feature_engineering.py +1648 -0
  33. teradataml/automl/feature_exploration.py +547 -0
  34. teradataml/automl/model_evaluation.py +163 -0
  35. teradataml/automl/model_training.py +887 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/pkce_client.py +481 -481
  41. teradataml/common/aed_utils.py +6 -2
  42. teradataml/common/bulk_exposed_utils.py +111 -111
  43. teradataml/common/constants.py +1433 -1441
  44. teradataml/common/deprecations.py +160 -0
  45. teradataml/common/exceptions.py +73 -73
  46. teradataml/common/formula.py +742 -742
  47. teradataml/common/garbagecollector.py +592 -635
  48. teradataml/common/messagecodes.py +422 -431
  49. teradataml/common/messages.py +227 -231
  50. teradataml/common/sqlbundle.py +693 -693
  51. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  52. teradataml/common/utils.py +2418 -2500
  53. teradataml/common/warnings.py +25 -25
  54. teradataml/common/wrapper_utils.py +1 -110
  55. teradataml/config/dummy_file1.cfg +4 -4
  56. teradataml/config/dummy_file2.cfg +2 -2
  57. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  58. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  59. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  60. teradataml/context/aed_context.py +217 -217
  61. teradataml/context/context.py +1071 -999
  62. teradataml/data/A_loan.csv +19 -19
  63. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  64. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  65. teradataml/data/B_loan.csv +49 -49
  66. teradataml/data/BuoyData2.csv +17 -17
  67. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  68. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  69. teradataml/data/Convolve2RealsLeft.csv +5 -5
  70. teradataml/data/Convolve2RealsRight.csv +5 -5
  71. teradataml/data/Convolve2ValidLeft.csv +11 -11
  72. teradataml/data/Convolve2ValidRight.csv +11 -11
  73. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  74. teradataml/data/Orders1_12mf.csv +24 -24
  75. teradataml/data/Pi_loan.csv +7 -7
  76. teradataml/data/SMOOTHED_DATA.csv +7 -7
  77. teradataml/data/TestDFFT8.csv +9 -9
  78. teradataml/data/TestRiver.csv +109 -109
  79. teradataml/data/Traindata.csv +28 -28
  80. teradataml/data/acf.csv +17 -17
  81. teradataml/data/adaboost_example.json +34 -34
  82. teradataml/data/adaboostpredict_example.json +24 -24
  83. teradataml/data/additional_table.csv +10 -10
  84. teradataml/data/admissions_test.csv +21 -21
  85. teradataml/data/admissions_train.csv +41 -41
  86. teradataml/data/admissions_train_nulls.csv +41 -41
  87. teradataml/data/ageandheight.csv +13 -13
  88. teradataml/data/ageandpressure.csv +31 -31
  89. teradataml/data/antiselect_example.json +36 -36
  90. teradataml/data/antiselect_input.csv +8 -8
  91. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  92. teradataml/data/applicant_external.csv +6 -6
  93. teradataml/data/applicant_reference.csv +6 -6
  94. teradataml/data/arima_example.json +9 -9
  95. teradataml/data/assortedtext_input.csv +8 -8
  96. teradataml/data/attribution_example.json +33 -33
  97. teradataml/data/attribution_sample_table.csv +27 -27
  98. teradataml/data/attribution_sample_table1.csv +6 -6
  99. teradataml/data/attribution_sample_table2.csv +11 -11
  100. teradataml/data/bank_churn.csv +10001 -0
  101. teradataml/data/bank_web_clicks1.csv +42 -42
  102. teradataml/data/bank_web_clicks2.csv +91 -91
  103. teradataml/data/bank_web_url.csv +85 -85
  104. teradataml/data/barrier.csv +2 -2
  105. teradataml/data/barrier_new.csv +3 -3
  106. teradataml/data/betweenness_example.json +13 -13
  107. teradataml/data/bin_breaks.csv +8 -8
  108. teradataml/data/bin_fit_ip.csv +3 -3
  109. teradataml/data/binary_complex_left.csv +11 -11
  110. teradataml/data/binary_complex_right.csv +11 -11
  111. teradataml/data/binary_matrix_complex_left.csv +21 -21
  112. teradataml/data/binary_matrix_complex_right.csv +21 -21
  113. teradataml/data/binary_matrix_real_left.csv +21 -21
  114. teradataml/data/binary_matrix_real_right.csv +21 -21
  115. teradataml/data/blood2ageandweight.csv +26 -26
  116. teradataml/data/bmi.csv +501 -0
  117. teradataml/data/boston.csv +507 -507
  118. teradataml/data/buoydata_mix.csv +11 -11
  119. teradataml/data/burst_data.csv +5 -5
  120. teradataml/data/burst_example.json +20 -20
  121. teradataml/data/byom_example.json +17 -17
  122. teradataml/data/bytes_table.csv +3 -3
  123. teradataml/data/cal_housing_ex_raw.csv +70 -70
  124. teradataml/data/callers.csv +7 -7
  125. teradataml/data/calls.csv +10 -10
  126. teradataml/data/cars_hist.csv +33 -33
  127. teradataml/data/cat_table.csv +24 -24
  128. teradataml/data/ccm_example.json +31 -31
  129. teradataml/data/ccm_input.csv +91 -91
  130. teradataml/data/ccm_input2.csv +13 -13
  131. teradataml/data/ccmexample.csv +101 -101
  132. teradataml/data/ccmprepare_example.json +8 -8
  133. teradataml/data/ccmprepare_input.csv +91 -91
  134. teradataml/data/cfilter_example.json +12 -12
  135. teradataml/data/changepointdetection_example.json +18 -18
  136. teradataml/data/changepointdetectionrt_example.json +8 -8
  137. teradataml/data/chi_sq.csv +2 -2
  138. teradataml/data/churn_data.csv +14 -14
  139. teradataml/data/churn_emission.csv +35 -35
  140. teradataml/data/churn_initial.csv +3 -3
  141. teradataml/data/churn_state_transition.csv +5 -5
  142. teradataml/data/citedges_2.csv +745 -745
  143. teradataml/data/citvertices_2.csv +1210 -1210
  144. teradataml/data/clicks2.csv +16 -16
  145. teradataml/data/clickstream.csv +12 -12
  146. teradataml/data/clickstream1.csv +11 -11
  147. teradataml/data/closeness_example.json +15 -15
  148. teradataml/data/complaints.csv +21 -21
  149. teradataml/data/complaints_mini.csv +3 -3
  150. teradataml/data/complaints_testtoken.csv +224 -224
  151. teradataml/data/complaints_tokens_test.csv +353 -353
  152. teradataml/data/complaints_traintoken.csv +472 -472
  153. teradataml/data/computers_category.csv +1001 -1001
  154. teradataml/data/computers_test1.csv +1252 -1252
  155. teradataml/data/computers_train1.csv +5009 -5009
  156. teradataml/data/computers_train1_clustered.csv +5009 -5009
  157. teradataml/data/confusionmatrix_example.json +9 -9
  158. teradataml/data/conversion_event_table.csv +3 -3
  159. teradataml/data/corr_input.csv +17 -17
  160. teradataml/data/correlation_example.json +11 -11
  161. teradataml/data/coxhazardratio_example.json +39 -39
  162. teradataml/data/coxph_example.json +15 -15
  163. teradataml/data/coxsurvival_example.json +28 -28
  164. teradataml/data/cpt.csv +41 -41
  165. teradataml/data/credit_ex_merged.csv +45 -45
  166. teradataml/data/customer_loyalty.csv +301 -301
  167. teradataml/data/customer_loyalty_newseq.csv +31 -31
  168. teradataml/data/dataframe_example.json +146 -146
  169. teradataml/data/decisionforest_example.json +37 -37
  170. teradataml/data/decisionforestpredict_example.json +38 -38
  171. teradataml/data/decisiontree_example.json +21 -21
  172. teradataml/data/decisiontreepredict_example.json +45 -45
  173. teradataml/data/dfft2_size4_real.csv +17 -17
  174. teradataml/data/dfft2_test_matrix16.csv +17 -17
  175. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  176. teradataml/data/diabetes.csv +443 -443
  177. teradataml/data/diabetes_test.csv +89 -89
  178. teradataml/data/dict_table.csv +5 -5
  179. teradataml/data/docperterm_table.csv +4 -4
  180. teradataml/data/docs/__init__.py +1 -1
  181. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  182. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  183. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  184. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  185. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  186. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  187. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  188. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  189. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  190. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  191. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  192. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  193. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  194. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  195. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  196. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  197. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  198. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  199. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  200. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  201. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  202. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  203. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  204. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  205. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  206. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  207. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  208. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  209. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +132 -132
  210. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +103 -103
  211. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  212. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +101 -101
  213. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  214. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  215. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  216. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  217. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  218. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  219. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  220. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  221. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  222. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  223. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  224. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  225. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  226. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  227. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  228. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  229. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  230. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  231. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  232. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  233. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  234. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +126 -126
  235. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  236. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  237. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  238. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  239. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  240. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  241. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  242. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  243. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +243 -243
  244. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  245. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  246. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  247. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  248. teradataml/data/docs/sqle/docs_17_20/FTest.py +160 -160
  249. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  250. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  251. teradataml/data/docs/sqle/docs_17_20/GLM.py +380 -380
  252. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  253. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  254. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  255. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +123 -123
  256. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  257. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  258. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  259. teradataml/data/docs/sqle/docs_17_20/KMeans.py +204 -204
  260. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  261. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  262. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  263. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  264. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  265. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  266. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  267. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  268. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +117 -117
  269. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  270. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  271. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  272. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  273. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +225 -225
  274. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +115 -115
  275. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  276. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  277. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  278. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  279. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  280. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  281. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  282. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  283. teradataml/data/docs/sqle/docs_17_20/ROC.py +163 -163
  284. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  285. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  286. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  287. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  288. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  289. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  290. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  291. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  292. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +202 -202
  293. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  294. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +197 -197
  295. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +110 -109
  296. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  297. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  298. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  299. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  300. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  301. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  302. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  303. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  304. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +171 -171
  305. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  306. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  307. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  308. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  309. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  310. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  311. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  312. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  313. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  314. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  315. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  316. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +353 -353
  317. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +275 -275
  318. teradataml/data/docs/sqle/docs_17_20/ZTest.py +155 -155
  319. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  320. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  321. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  322. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  323. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  324. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  325. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  326. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  327. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  328. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  329. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  330. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  331. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  332. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  333. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  334. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  335. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  336. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  337. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  338. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  339. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  340. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  341. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  342. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  343. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  344. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  345. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  346. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  347. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  348. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  349. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  350. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  351. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  352. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  353. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  354. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  355. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  356. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  357. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  358. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  359. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  360. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  361. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  362. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  363. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  364. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  365. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  366. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  367. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  368. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  369. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  370. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  371. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  372. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  373. teradataml/data/dtw_example.json +17 -17
  374. teradataml/data/dtw_t1.csv +11 -11
  375. teradataml/data/dtw_t2.csv +4 -4
  376. teradataml/data/dwt2d_example.json +15 -15
  377. teradataml/data/dwt_example.json +14 -14
  378. teradataml/data/dwt_filter_dim.csv +5 -5
  379. teradataml/data/emission.csv +9 -9
  380. teradataml/data/emp_table_by_dept.csv +19 -19
  381. teradataml/data/employee_info.csv +4 -4
  382. teradataml/data/employee_table.csv +6 -6
  383. teradataml/data/excluding_event_table.csv +2 -2
  384. teradataml/data/finance_data.csv +6 -6
  385. teradataml/data/finance_data2.csv +61 -61
  386. teradataml/data/finance_data3.csv +93 -93
  387. teradataml/data/fish.csv +160 -0
  388. teradataml/data/fm_blood2ageandweight.csv +26 -26
  389. teradataml/data/fmeasure_example.json +11 -11
  390. teradataml/data/followers_leaders.csv +10 -10
  391. teradataml/data/fpgrowth_example.json +12 -12
  392. teradataml/data/frequentpaths_example.json +29 -29
  393. teradataml/data/friends.csv +9 -9
  394. teradataml/data/fs_input.csv +33 -33
  395. teradataml/data/fs_input1.csv +33 -33
  396. teradataml/data/genData.csv +513 -513
  397. teradataml/data/geodataframe_example.json +39 -39
  398. teradataml/data/glass_types.csv +215 -0
  399. teradataml/data/glm_admissions_model.csv +12 -12
  400. teradataml/data/glm_example.json +29 -29
  401. teradataml/data/glml1l2_example.json +28 -28
  402. teradataml/data/glml1l2predict_example.json +54 -54
  403. teradataml/data/glmpredict_example.json +54 -54
  404. teradataml/data/gq_t1.csv +21 -21
  405. teradataml/data/hconvolve_complex_right.csv +5 -5
  406. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  407. teradataml/data/histogram_example.json +11 -11
  408. teradataml/data/hmmdecoder_example.json +78 -78
  409. teradataml/data/hmmevaluator_example.json +24 -24
  410. teradataml/data/hmmsupervised_example.json +10 -10
  411. teradataml/data/hmmunsupervised_example.json +7 -7
  412. teradataml/data/house_values.csv +12 -12
  413. teradataml/data/house_values2.csv +13 -13
  414. teradataml/data/housing_cat.csv +7 -7
  415. teradataml/data/housing_data.csv +9 -9
  416. teradataml/data/housing_test.csv +47 -47
  417. teradataml/data/housing_test_binary.csv +47 -47
  418. teradataml/data/housing_train.csv +493 -493
  419. teradataml/data/housing_train_attribute.csv +4 -4
  420. teradataml/data/housing_train_binary.csv +437 -437
  421. teradataml/data/housing_train_parameter.csv +2 -2
  422. teradataml/data/housing_train_response.csv +493 -493
  423. teradataml/data/ibm_stock.csv +370 -370
  424. teradataml/data/ibm_stock1.csv +370 -370
  425. teradataml/data/identitymatch_example.json +21 -21
  426. teradataml/data/idf_table.csv +4 -4
  427. teradataml/data/impressions.csv +101 -101
  428. teradataml/data/inflation.csv +21 -21
  429. teradataml/data/initial.csv +3 -3
  430. teradataml/data/insect_sprays.csv +12 -12
  431. teradataml/data/insurance.csv +1339 -1339
  432. teradataml/data/interpolator_example.json +12 -12
  433. teradataml/data/iris_altinput.csv +481 -481
  434. teradataml/data/iris_attribute_output.csv +8 -8
  435. teradataml/data/iris_attribute_test.csv +121 -121
  436. teradataml/data/iris_attribute_train.csv +481 -481
  437. teradataml/data/iris_category_expect_predict.csv +31 -31
  438. teradataml/data/iris_data.csv +151 -0
  439. teradataml/data/iris_input.csv +151 -151
  440. teradataml/data/iris_response_train.csv +121 -121
  441. teradataml/data/iris_test.csv +31 -31
  442. teradataml/data/iris_train.csv +121 -121
  443. teradataml/data/join_table1.csv +4 -4
  444. teradataml/data/join_table2.csv +4 -4
  445. teradataml/data/jsons/anly_function_name.json +6 -6
  446. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  447. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  448. teradataml/data/jsons/byom/h2opredict.json +194 -194
  449. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  450. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  451. teradataml/data/jsons/paired_functions.json +435 -435
  452. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  453. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  454. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  455. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  456. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  457. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  458. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  459. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  460. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  461. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  462. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  463. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  464. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  465. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  466. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  467. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  468. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  469. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  470. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  471. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  472. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  473. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  474. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  475. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  476. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  477. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  478. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  479. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  480. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  481. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  482. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  483. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  484. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  485. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  486. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  487. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  488. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  489. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  490. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  491. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  492. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  493. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  494. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  495. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  496. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  497. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  498. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  499. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  500. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  501. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  502. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  503. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  504. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  505. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  506. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  507. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  508. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  509. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  510. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  511. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  512. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  513. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  514. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  515. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  516. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  517. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  518. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  519. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  520. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  521. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  522. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  523. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  524. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  525. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  526. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  527. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  528. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  529. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  531. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  532. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  533. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  534. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  535. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  536. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  537. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  539. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  540. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  541. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  542. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  543. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  544. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  545. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  546. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  547. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  548. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  549. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  550. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  551. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  552. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  553. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  554. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  555. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  556. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  557. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  558. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +76 -76
  559. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  560. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  561. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  562. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  563. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  564. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  565. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  566. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  567. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  568. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  569. teradataml/data/jsons/sqle/17.20/TD_FTest.json +186 -186
  570. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  571. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  572. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  573. teradataml/data/jsons/sqle/17.20/TD_GLM.json +431 -431
  574. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +125 -125
  575. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  576. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  577. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +91 -91
  578. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  579. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  580. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  581. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +211 -211
  582. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  583. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  584. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  585. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +101 -101
  586. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  587. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  588. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  589. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  590. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  591. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  592. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  593. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  594. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  595. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  596. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  597. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  598. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  599. teradataml/data/jsons/sqle/17.20/TD_ROC.json +177 -177
  600. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  601. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  602. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  603. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  604. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  605. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  606. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  607. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  608. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +124 -124
  609. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +156 -156
  610. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +70 -70
  611. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  612. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  613. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  614. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  615. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  616. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  617. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  618. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  619. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  620. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  621. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  622. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  623. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  624. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  625. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +312 -312
  626. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +182 -182
  627. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +170 -170
  628. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  629. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  630. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  631. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  632. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  633. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  634. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  635. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  636. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  637. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  638. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  639. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  640. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  641. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  642. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  643. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  644. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  645. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  646. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  647. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  648. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  649. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  650. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  651. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  653. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  654. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  655. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  656. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  657. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  658. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  659. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  660. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  661. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  662. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  663. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  664. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  665. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  666. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  667. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  668. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  669. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  670. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  671. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  672. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  673. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  674. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  675. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  676. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  677. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  678. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  679. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  680. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  681. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  682. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  683. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  684. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  685. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  686. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  687. teradataml/data/kmeans_example.json +17 -17
  688. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  689. teradataml/data/knn_example.json +18 -18
  690. teradataml/data/knnrecommender_example.json +6 -6
  691. teradataml/data/knnrecommenderpredict_example.json +12 -12
  692. teradataml/data/lar_example.json +17 -17
  693. teradataml/data/larpredict_example.json +30 -30
  694. teradataml/data/lc_new_predictors.csv +5 -5
  695. teradataml/data/lc_new_reference.csv +9 -9
  696. teradataml/data/lda_example.json +8 -8
  697. teradataml/data/ldainference_example.json +14 -14
  698. teradataml/data/ldatopicsummary_example.json +8 -8
  699. teradataml/data/levendist_input.csv +13 -13
  700. teradataml/data/levenshteindistance_example.json +10 -10
  701. teradataml/data/linreg_example.json +9 -9
  702. teradataml/data/load_example_data.py +326 -323
  703. teradataml/data/loan_prediction.csv +295 -295
  704. teradataml/data/lungcancer.csv +138 -138
  705. teradataml/data/mappingdata.csv +12 -12
  706. teradataml/data/milk_timeseries.csv +157 -157
  707. teradataml/data/min_max_titanic.csv +4 -4
  708. teradataml/data/minhash_example.json +6 -6
  709. teradataml/data/ml_ratings.csv +7547 -7547
  710. teradataml/data/ml_ratings_10.csv +2445 -2445
  711. teradataml/data/model1_table.csv +5 -5
  712. teradataml/data/model2_table.csv +5 -5
  713. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  714. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  715. teradataml/data/modularity_example.json +12 -12
  716. teradataml/data/movavg_example.json +7 -7
  717. teradataml/data/mtx1.csv +7 -7
  718. teradataml/data/mtx2.csv +13 -13
  719. teradataml/data/multi_model_classification.csv +401 -0
  720. teradataml/data/multi_model_regression.csv +401 -0
  721. teradataml/data/mvdfft8.csv +9 -9
  722. teradataml/data/naivebayes_example.json +9 -9
  723. teradataml/data/naivebayespredict_example.json +19 -19
  724. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  725. teradataml/data/naivebayestextclassifier_example.json +8 -8
  726. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  727. teradataml/data/name_Find_configure.csv +10 -10
  728. teradataml/data/namedentityfinder_example.json +14 -14
  729. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  730. teradataml/data/namedentityfindertrainer_example.json +6 -6
  731. teradataml/data/nb_iris_input_test.csv +31 -31
  732. teradataml/data/nb_iris_input_train.csv +121 -121
  733. teradataml/data/nbp_iris_model.csv +13 -13
  734. teradataml/data/ner_extractor_text.csv +2 -2
  735. teradataml/data/ner_sports_test2.csv +29 -29
  736. teradataml/data/ner_sports_train.csv +501 -501
  737. teradataml/data/nerevaluator_example.json +5 -5
  738. teradataml/data/nerextractor_example.json +18 -18
  739. teradataml/data/nermem_sports_test.csv +17 -17
  740. teradataml/data/nermem_sports_train.csv +50 -50
  741. teradataml/data/nertrainer_example.json +6 -6
  742. teradataml/data/ngrams_example.json +6 -6
  743. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  744. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  745. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  746. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  747. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  748. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  749. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  750. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  751. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  752. teradataml/data/npath_example.json +23 -23
  753. teradataml/data/ntree_example.json +14 -14
  754. teradataml/data/numeric_strings.csv +4 -4
  755. teradataml/data/numerics.csv +4 -4
  756. teradataml/data/ocean_buoy.csv +17 -17
  757. teradataml/data/ocean_buoy2.csv +17 -17
  758. teradataml/data/ocean_buoys.csv +27 -27
  759. teradataml/data/ocean_buoys2.csv +10 -10
  760. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  761. teradataml/data/ocean_buoys_seq.csv +29 -29
  762. teradataml/data/openml_example.json +63 -0
  763. teradataml/data/optional_event_table.csv +4 -4
  764. teradataml/data/orders1.csv +11 -11
  765. teradataml/data/orders1_12.csv +12 -12
  766. teradataml/data/orders_ex.csv +4 -4
  767. teradataml/data/pack_example.json +8 -8
  768. teradataml/data/package_tracking.csv +19 -19
  769. teradataml/data/package_tracking_pti.csv +18 -18
  770. teradataml/data/pagerank_example.json +13 -13
  771. teradataml/data/paragraphs_input.csv +6 -6
  772. teradataml/data/pathanalyzer_example.json +7 -7
  773. teradataml/data/pathgenerator_example.json +7 -7
  774. teradataml/data/phrases.csv +7 -7
  775. teradataml/data/pivot_example.json +8 -8
  776. teradataml/data/pivot_input.csv +22 -22
  777. teradataml/data/playerRating.csv +31 -31
  778. teradataml/data/postagger_example.json +6 -6
  779. teradataml/data/posttagger_output.csv +44 -44
  780. teradataml/data/production_data.csv +16 -16
  781. teradataml/data/production_data2.csv +7 -7
  782. teradataml/data/randomsample_example.json +31 -31
  783. teradataml/data/randomwalksample_example.json +8 -8
  784. teradataml/data/rank_table.csv +6 -6
  785. teradataml/data/ref_mobile_data.csv +4 -4
  786. teradataml/data/ref_mobile_data_dense.csv +2 -2
  787. teradataml/data/ref_url.csv +17 -17
  788. teradataml/data/restaurant_reviews.csv +7 -7
  789. teradataml/data/river_data.csv +145 -145
  790. teradataml/data/roc_example.json +7 -7
  791. teradataml/data/roc_input.csv +101 -101
  792. teradataml/data/rule_inputs.csv +6 -6
  793. teradataml/data/rule_table.csv +2 -2
  794. teradataml/data/sales.csv +7 -7
  795. teradataml/data/sales_transaction.csv +501 -501
  796. teradataml/data/salesdata.csv +342 -342
  797. teradataml/data/sample_cities.csv +2 -2
  798. teradataml/data/sample_shapes.csv +10 -10
  799. teradataml/data/sample_streets.csv +2 -2
  800. teradataml/data/sampling_example.json +15 -15
  801. teradataml/data/sax_example.json +8 -8
  802. teradataml/data/scale_example.json +23 -23
  803. teradataml/data/scale_housing.csv +11 -11
  804. teradataml/data/scale_housing_test.csv +6 -6
  805. teradataml/data/scale_stat.csv +11 -11
  806. teradataml/data/scalebypartition_example.json +13 -13
  807. teradataml/data/scalemap_example.json +13 -13
  808. teradataml/data/scalesummary_example.json +12 -12
  809. teradataml/data/score_category.csv +101 -101
  810. teradataml/data/score_summary.csv +4 -4
  811. teradataml/data/script_example.json +9 -9
  812. teradataml/data/scripts/deploy_script.py +65 -0
  813. teradataml/data/scripts/mapper.R +20 -0
  814. teradataml/data/scripts/mapper.py +15 -15
  815. teradataml/data/scripts/mapper_replace.py +15 -15
  816. teradataml/data/scripts/sklearn/__init__.py +0 -0
  817. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  818. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  819. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  820. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  821. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  822. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  823. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  824. teradataml/data/seeds.csv +10 -10
  825. teradataml/data/sentenceextractor_example.json +6 -6
  826. teradataml/data/sentiment_extract_input.csv +11 -11
  827. teradataml/data/sentiment_train.csv +16 -16
  828. teradataml/data/sentiment_word.csv +20 -20
  829. teradataml/data/sentiment_word_input.csv +19 -19
  830. teradataml/data/sentimentextractor_example.json +24 -24
  831. teradataml/data/sentimenttrainer_example.json +8 -8
  832. teradataml/data/sequence_table.csv +10 -10
  833. teradataml/data/seriessplitter_example.json +7 -7
  834. teradataml/data/sessionize_example.json +17 -17
  835. teradataml/data/sessionize_table.csv +116 -116
  836. teradataml/data/setop_test1.csv +24 -24
  837. teradataml/data/setop_test2.csv +22 -22
  838. teradataml/data/soc_nw_edges.csv +10 -10
  839. teradataml/data/soc_nw_vertices.csv +7 -7
  840. teradataml/data/souvenir_timeseries.csv +167 -167
  841. teradataml/data/sparse_iris_attribute.csv +5 -5
  842. teradataml/data/sparse_iris_test.csv +121 -121
  843. teradataml/data/sparse_iris_train.csv +601 -601
  844. teradataml/data/star1.csv +6 -6
  845. teradataml/data/state_transition.csv +5 -5
  846. teradataml/data/stock_data.csv +53 -53
  847. teradataml/data/stock_movement.csv +11 -11
  848. teradataml/data/stock_vol.csv +76 -76
  849. teradataml/data/stop_words.csv +8 -8
  850. teradataml/data/store_sales.csv +37 -37
  851. teradataml/data/stringsimilarity_example.json +7 -7
  852. teradataml/data/strsimilarity_input.csv +13 -13
  853. teradataml/data/students.csv +101 -101
  854. teradataml/data/svm_iris_input_test.csv +121 -121
  855. teradataml/data/svm_iris_input_train.csv +481 -481
  856. teradataml/data/svm_iris_model.csv +7 -7
  857. teradataml/data/svmdense_example.json +9 -9
  858. teradataml/data/svmdensepredict_example.json +18 -18
  859. teradataml/data/svmsparse_example.json +7 -7
  860. teradataml/data/svmsparsepredict_example.json +13 -13
  861. teradataml/data/svmsparsesummary_example.json +7 -7
  862. teradataml/data/target_mobile_data.csv +13 -13
  863. teradataml/data/target_mobile_data_dense.csv +5 -5
  864. teradataml/data/templatedata.csv +1201 -1201
  865. teradataml/data/templates/open_source_ml.json +9 -0
  866. teradataml/data/teradataml_example.json +73 -1
  867. teradataml/data/test_classification.csv +101 -0
  868. teradataml/data/test_loan_prediction.csv +53 -53
  869. teradataml/data/test_pacf_12.csv +37 -37
  870. teradataml/data/test_prediction.csv +101 -0
  871. teradataml/data/test_regression.csv +101 -0
  872. teradataml/data/test_river2.csv +109 -109
  873. teradataml/data/text_inputs.csv +6 -6
  874. teradataml/data/textchunker_example.json +7 -7
  875. teradataml/data/textclassifier_example.json +6 -6
  876. teradataml/data/textclassifier_input.csv +7 -7
  877. teradataml/data/textclassifiertrainer_example.json +6 -6
  878. teradataml/data/textmorph_example.json +5 -5
  879. teradataml/data/textparser_example.json +15 -15
  880. teradataml/data/texttagger_example.json +11 -11
  881. teradataml/data/texttokenizer_example.json +6 -6
  882. teradataml/data/texttrainer_input.csv +11 -11
  883. teradataml/data/tf_example.json +6 -6
  884. teradataml/data/tfidf_example.json +13 -13
  885. teradataml/data/tfidf_input1.csv +201 -201
  886. teradataml/data/tfidf_train.csv +6 -6
  887. teradataml/data/time_table1.csv +535 -535
  888. teradataml/data/time_table2.csv +14 -14
  889. teradataml/data/timeseriesdata.csv +1601 -1601
  890. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  891. teradataml/data/titanic.csv +892 -892
  892. teradataml/data/token_table.csv +696 -696
  893. teradataml/data/train_multiclass.csv +101 -0
  894. teradataml/data/train_regression.csv +101 -0
  895. teradataml/data/train_regression_multiple_labels.csv +101 -0
  896. teradataml/data/train_tracking.csv +27 -27
  897. teradataml/data/transformation_table.csv +5 -5
  898. teradataml/data/transformation_table_new.csv +1 -1
  899. teradataml/data/tv_spots.csv +16 -16
  900. teradataml/data/twod_climate_data.csv +117 -117
  901. teradataml/data/uaf_example.json +475 -475
  902. teradataml/data/univariatestatistics_example.json +8 -8
  903. teradataml/data/unpack_example.json +9 -9
  904. teradataml/data/unpivot_example.json +9 -9
  905. teradataml/data/unpivot_input.csv +8 -8
  906. teradataml/data/us_air_pass.csv +36 -36
  907. teradataml/data/us_population.csv +624 -624
  908. teradataml/data/us_states_shapes.csv +52 -52
  909. teradataml/data/varmax_example.json +17 -17
  910. teradataml/data/vectordistance_example.json +25 -25
  911. teradataml/data/ville_climatedata.csv +121 -121
  912. teradataml/data/ville_tempdata.csv +12 -12
  913. teradataml/data/ville_tempdata1.csv +12 -12
  914. teradataml/data/ville_temperature.csv +11 -11
  915. teradataml/data/waveletTable.csv +1605 -1605
  916. teradataml/data/waveletTable2.csv +1605 -1605
  917. teradataml/data/weightedmovavg_example.json +8 -8
  918. teradataml/data/wft_testing.csv +5 -5
  919. teradataml/data/wine_data.csv +1600 -0
  920. teradataml/data/word_embed_input_table1.csv +5 -5
  921. teradataml/data/word_embed_input_table2.csv +4 -4
  922. teradataml/data/word_embed_model.csv +22 -22
  923. teradataml/data/words_input.csv +13 -13
  924. teradataml/data/xconvolve_complex_left.csv +6 -6
  925. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  926. teradataml/data/xgboost_example.json +35 -35
  927. teradataml/data/xgboostpredict_example.json +31 -31
  928. teradataml/dataframe/copy_to.py +1764 -1698
  929. teradataml/dataframe/data_transfer.py +2753 -2745
  930. teradataml/dataframe/dataframe.py +17545 -16946
  931. teradataml/dataframe/dataframe_utils.py +1837 -1740
  932. teradataml/dataframe/fastload.py +611 -603
  933. teradataml/dataframe/indexer.py +424 -424
  934. teradataml/dataframe/setop.py +1179 -1166
  935. teradataml/dataframe/sql.py +10090 -6432
  936. teradataml/dataframe/sql_function_parameters.py +439 -388
  937. teradataml/dataframe/sql_functions.py +652 -652
  938. teradataml/dataframe/sql_interfaces.py +220 -220
  939. teradataml/dataframe/vantage_function_types.py +674 -630
  940. teradataml/dataframe/window.py +693 -692
  941. teradataml/dbutils/__init__.py +3 -3
  942. teradataml/dbutils/dbutils.py +1167 -1150
  943. teradataml/dbutils/filemgr.py +267 -267
  944. teradataml/gen_ai/__init__.py +2 -2
  945. teradataml/gen_ai/convAI.py +472 -472
  946. teradataml/geospatial/__init__.py +3 -3
  947. teradataml/geospatial/geodataframe.py +1105 -1094
  948. teradataml/geospatial/geodataframecolumn.py +392 -387
  949. teradataml/geospatial/geometry_types.py +925 -925
  950. teradataml/hyperparameter_tuner/__init__.py +1 -1
  951. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  952. teradataml/hyperparameter_tuner/utils.py +281 -187
  953. teradataml/lib/aed_0_1.dll +0 -0
  954. teradataml/lib/libaed_0_1.dylib +0 -0
  955. teradataml/lib/libaed_0_1.so +0 -0
  956. teradataml/libaed_0_1.dylib +0 -0
  957. teradataml/libaed_0_1.so +0 -0
  958. teradataml/opensource/__init__.py +1 -0
  959. teradataml/opensource/sklearn/__init__.py +1 -0
  960. teradataml/opensource/sklearn/_class.py +255 -0
  961. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  962. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  963. teradataml/opensource/sklearn/constants.py +54 -0
  964. teradataml/options/__init__.py +121 -124
  965. teradataml/options/configure.py +337 -336
  966. teradataml/options/display.py +176 -176
  967. teradataml/plot/__init__.py +2 -2
  968. teradataml/plot/axis.py +1388 -1388
  969. teradataml/plot/constants.py +15 -15
  970. teradataml/plot/figure.py +398 -398
  971. teradataml/plot/plot.py +760 -760
  972. teradataml/plot/query_generator.py +83 -83
  973. teradataml/plot/subplot.py +216 -216
  974. teradataml/scriptmgmt/UserEnv.py +3788 -3761
  975. teradataml/scriptmgmt/__init__.py +3 -3
  976. teradataml/scriptmgmt/lls_utils.py +1616 -1604
  977. teradataml/series/series.py +532 -532
  978. teradataml/series/series_utils.py +71 -71
  979. teradataml/table_operators/Apply.py +949 -917
  980. teradataml/table_operators/Script.py +1719 -1982
  981. teradataml/table_operators/TableOperator.py +1207 -1616
  982. teradataml/table_operators/__init__.py +2 -3
  983. teradataml/table_operators/apply_query_generator.py +262 -262
  984. teradataml/table_operators/query_generator.py +507 -507
  985. teradataml/table_operators/table_operator_query_generator.py +460 -460
  986. teradataml/table_operators/table_operator_util.py +631 -639
  987. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  988. teradataml/table_operators/templates/dataframe_map.template +176 -176
  989. teradataml/table_operators/templates/script_executor.template +170 -170
  990. teradataml/utils/dtypes.py +684 -684
  991. teradataml/utils/internal_buffer.py +84 -84
  992. teradataml/utils/print_versions.py +205 -205
  993. teradataml/utils/utils.py +410 -410
  994. teradataml/utils/validators.py +2239 -2115
  995. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +270 -41
  996. teradataml-20.0.0.0.dist-info/RECORD +1038 -0
  997. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +1 -1
  998. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +1 -1
  999. teradataml/analytics/mle/AdaBoost.py +0 -651
  1000. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1001. teradataml/analytics/mle/Antiselect.py +0 -342
  1002. teradataml/analytics/mle/Arima.py +0 -641
  1003. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1004. teradataml/analytics/mle/Attribution.py +0 -1070
  1005. teradataml/analytics/mle/Betweenness.py +0 -658
  1006. teradataml/analytics/mle/Burst.py +0 -711
  1007. teradataml/analytics/mle/CCM.py +0 -600
  1008. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1009. teradataml/analytics/mle/CFilter.py +0 -460
  1010. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1011. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1012. teradataml/analytics/mle/Closeness.py +0 -737
  1013. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1014. teradataml/analytics/mle/Correlation.py +0 -477
  1015. teradataml/analytics/mle/Correlation2.py +0 -573
  1016. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1017. teradataml/analytics/mle/CoxPH.py +0 -556
  1018. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1019. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1020. teradataml/analytics/mle/DTW.py +0 -623
  1021. teradataml/analytics/mle/DWT.py +0 -564
  1022. teradataml/analytics/mle/DWT2D.py +0 -599
  1023. teradataml/analytics/mle/DecisionForest.py +0 -716
  1024. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1025. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1026. teradataml/analytics/mle/DecisionTree.py +0 -830
  1027. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1028. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1029. teradataml/analytics/mle/FMeasure.py +0 -402
  1030. teradataml/analytics/mle/FPGrowth.py +0 -734
  1031. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1032. teradataml/analytics/mle/GLM.py +0 -558
  1033. teradataml/analytics/mle/GLML1L2.py +0 -547
  1034. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1035. teradataml/analytics/mle/GLMPredict.py +0 -529
  1036. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1037. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1038. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1039. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1040. teradataml/analytics/mle/Histogram.py +0 -561
  1041. teradataml/analytics/mle/IDWT.py +0 -476
  1042. teradataml/analytics/mle/IDWT2D.py +0 -493
  1043. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1044. teradataml/analytics/mle/Interpolator.py +0 -918
  1045. teradataml/analytics/mle/KMeans.py +0 -485
  1046. teradataml/analytics/mle/KNN.py +0 -627
  1047. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1048. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1049. teradataml/analytics/mle/LAR.py +0 -439
  1050. teradataml/analytics/mle/LARPredict.py +0 -478
  1051. teradataml/analytics/mle/LDA.py +0 -548
  1052. teradataml/analytics/mle/LDAInference.py +0 -492
  1053. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1054. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1055. teradataml/analytics/mle/LinReg.py +0 -433
  1056. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1057. teradataml/analytics/mle/MinHash.py +0 -544
  1058. teradataml/analytics/mle/Modularity.py +0 -587
  1059. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1060. teradataml/analytics/mle/NERExtractor.py +0 -595
  1061. teradataml/analytics/mle/NERTrainer.py +0 -458
  1062. teradataml/analytics/mle/NGrams.py +0 -570
  1063. teradataml/analytics/mle/NPath.py +0 -634
  1064. teradataml/analytics/mle/NTree.py +0 -549
  1065. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1066. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1067. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1068. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1069. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1070. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1071. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1072. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1073. teradataml/analytics/mle/POSTagger.py +0 -417
  1074. teradataml/analytics/mle/Pack.py +0 -411
  1075. teradataml/analytics/mle/PageRank.py +0 -535
  1076. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1077. teradataml/analytics/mle/PathGenerator.py +0 -367
  1078. teradataml/analytics/mle/PathStart.py +0 -464
  1079. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1080. teradataml/analytics/mle/Pivot.py +0 -471
  1081. teradataml/analytics/mle/ROC.py +0 -425
  1082. teradataml/analytics/mle/RandomSample.py +0 -637
  1083. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1084. teradataml/analytics/mle/SAX.py +0 -779
  1085. teradataml/analytics/mle/SVMDense.py +0 -677
  1086. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1087. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1088. teradataml/analytics/mle/SVMSparse.py +0 -557
  1089. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1090. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1091. teradataml/analytics/mle/Sampling.py +0 -549
  1092. teradataml/analytics/mle/Scale.py +0 -565
  1093. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1094. teradataml/analytics/mle/ScaleMap.py +0 -378
  1095. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1096. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1097. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1098. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1099. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1100. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1101. teradataml/analytics/mle/Sessionize.py +0 -475
  1102. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1103. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1104. teradataml/analytics/mle/TF.py +0 -389
  1105. teradataml/analytics/mle/TFIDF.py +0 -504
  1106. teradataml/analytics/mle/TextChunker.py +0 -414
  1107. teradataml/analytics/mle/TextClassifier.py +0 -399
  1108. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1109. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1110. teradataml/analytics/mle/TextMorph.py +0 -494
  1111. teradataml/analytics/mle/TextParser.py +0 -623
  1112. teradataml/analytics/mle/TextTagger.py +0 -530
  1113. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1114. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1115. teradataml/analytics/mle/Unpack.py +0 -526
  1116. teradataml/analytics/mle/Unpivot.py +0 -438
  1117. teradataml/analytics/mle/VarMax.py +0 -776
  1118. teradataml/analytics/mle/VectorDistance.py +0 -762
  1119. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1120. teradataml/analytics/mle/XGBoost.py +0 -842
  1121. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1122. teradataml/analytics/mle/__init__.py +0 -123
  1123. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1124. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1125. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1126. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1127. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1128. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1129. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1130. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1131. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1132. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1133. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1134. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1135. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1136. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1137. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1138. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1139. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1140. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1141. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1142. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1143. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1144. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1145. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1146. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1147. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1148. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1149. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1150. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1151. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1152. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1153. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1154. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1155. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1156. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1157. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1158. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1159. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1160. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1161. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1162. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1163. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1164. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1165. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1166. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1167. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1168. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1169. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1170. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1171. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1172. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1173. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1174. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1175. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1176. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1177. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1178. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1179. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1180. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1181. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1182. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1183. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1184. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1185. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1186. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1187. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1188. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1189. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1190. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1191. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1192. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1193. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1194. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1195. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1196. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1197. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1198. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1199. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1200. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1201. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1202. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1203. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1204. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1205. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1206. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1207. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1208. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1209. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1210. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1211. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1212. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1213. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1214. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1215. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1216. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1217. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1218. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1219. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1220. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1221. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1222. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1223. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1224. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1225. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1226. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1227. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1228. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1229. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1230. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1231. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1232. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1233. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1234. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1235. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1236. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1237. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1238. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1239. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1240. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1241. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1242. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1243. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1244. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1245. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1246. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1247. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1248. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1249. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1250. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1251. teradataml/analytics/sqle/Antiselect.py +0 -321
  1252. teradataml/analytics/sqle/Attribution.py +0 -603
  1253. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1254. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1255. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1256. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1257. teradataml/analytics/sqle/NPath.py +0 -632
  1258. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1259. teradataml/analytics/sqle/Pack.py +0 -388
  1260. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1261. teradataml/analytics/sqle/Sessionize.py +0 -390
  1262. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1263. teradataml/analytics/sqle/Unpack.py +0 -503
  1264. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1265. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1266. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1267. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1268. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1269. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1270. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1271. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1272. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1273. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1274. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1275. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1276. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1277. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1278. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1279. teradataml/catalog/model_cataloging.py +0 -980
  1280. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1281. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1282. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1283. teradataml/table_operators/sandbox_container_util.py +0 -643
  1284. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1285. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1648 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2024 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import pandas as pd
18
+ import time
19
+ import json
20
+ import re
21
+
22
+ # Teradata libraries
23
+ from teradataml.dataframe.dataframe import DataFrame
24
+ from teradataml.dataframe.copy_to import copy_to_sql
25
+ from teradataml import Antiselect
26
+ from teradataml import BincodeFit, BincodeTransform
27
+ from teradataml import ColumnSummary, CategoricalSummary, GetFutileColumns, FillRowId
28
+ from teradataml import Fit, Transform
29
+ from teradataml import NonLinearCombineFit, NonLinearCombineTransform
30
+ from teradataml import NumApply
31
+ from teradataml import OneHotEncodingFit, OneHotEncodingTransform
32
+ from teradataml import OrdinalEncodingFit, OrdinalEncodingTransform
33
+ from teradataml import SimpleImputeFit, SimpleImputeTransform
34
+ from teradataml import StrApply
35
+ from teradataml import TargetEncodingFit, TargetEncodingTransform
36
+ from sqlalchemy import literal_column
37
+ from teradatasqlalchemy import INTEGER
38
+ from teradataml import display
39
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
40
+ from teradataml.utils.validators import _Validators
41
+
42
+
43
+ class _FeatureEngineering:
44
+
45
+ def __init__(self,
46
+ data,
47
+ target_column,
48
+ model_list,
49
+ verbose = 0,
50
+ task_type = "Regression",
51
+ custom_data = None):
52
+ """
53
+ DESCRIPTION:
54
+ Function initializes the data, target column and columns datatypes
55
+ for feature engineering.
56
+
57
+ PARAMETERS:
58
+ data:
59
+ Required Argument.
60
+ Specifies the input teradataml DataFrame for feature engineering.
61
+ Types: teradataml Dataframe
62
+
63
+ target_column:
64
+ Required Arugment.
65
+ Specifies the name of the target column in "data"..
66
+ Types: str
67
+
68
+ model_list:
69
+ Required Arugment.
70
+ Specifies the list of models to be used for model training.
71
+ Types: list
72
+
73
+ verbose:
74
+ Optional Argument.
75
+ Specifies the detailed execution steps based on verbose level.
76
+ Default Value: 0
77
+ Permitted Values:
78
+ * 0: prints the progress bar and leaderboard
79
+ * 1: prints the execution steps of AutoML.
80
+ * 2: prints the intermediate data between the execution of each step of AutoML.
81
+ Types: int
82
+
83
+ task_type:
84
+ Required Arugment.
85
+ Specifies the task type for AutoML, whether to apply regresion OR classification
86
+ on the provived dataset.
87
+ Default Value: "Regression"
88
+ Permitted Values: "Regression", "Classification"
89
+ Types: str
90
+
91
+ custom_data:
92
+ Optional Arugment.
93
+ Specifies json object containing user customized input.
94
+ Types: json object
95
+ """
96
+ # Instance variables
97
+ self.data = data
98
+ self.target_column = target_column
99
+ self.model_list = model_list
100
+ self.verbose = verbose
101
+ self.task_type = task_type
102
+ self.custom_data = custom_data
103
+ self.excluded_cols=[]
104
+ self.data_types = {key: value for key, value in self.data._column_names_and_types}
105
+ self.target_label = None
106
+ self.data_transform_dict = {}
107
+ self.one_hot_obj_count = 0
108
+ self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
109
+
110
+ # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
111
+ def feature_engineering(self,
112
+ auto=True):
113
+ """
114
+ DESCRIPTION:
115
+ Function performs following operations :-
116
+ 1. Removes futile columns/features from dataset.
117
+ 2. Detects the columns with missing values.
118
+ 3. Performs imputation on these columns with missing values.
119
+ 4. Detects categorical columns and perform encoding on those columns.
120
+
121
+ PARAMETERS:
122
+ auto:
123
+ Optional Arugment.
124
+ Specifies whether to run AutoML in custom mode or auto mode.
125
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
126
+ Default Value: True
127
+ Types: boolean
128
+
129
+ Returns:
130
+ tuple, First element represents teradataml DataFrame,
131
+ second element represents list of columns which are not participating in outlier tranformation.
132
+ """
133
+ # Assigning number of base jobs for progress bar.
134
+ base_jobs = 14 if auto else 18
135
+
136
+ # Updating model list based on distinct value of target column for classification type
137
+ if self.is_classification_type():
138
+ if self.data.drop_duplicate(self.target_column).size > 2:
139
+ unsupported_models = ['svm', 'glm']
140
+ self.model_list = [model for model in self.model_list if model not in unsupported_models]
141
+
142
+ # Updating number of jobs for progress bar based on number of models.
143
+ jobs = base_jobs + len(self.model_list)
144
+ self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Automl Running:')
145
+
146
+ self._display_heading(phase=1,
147
+ progress_bar=self.progress_bar)
148
+ self._display_msg(msg='Feature Engineering started ...',
149
+ progress_bar=self.progress_bar)
150
+
151
+ # Storing target column to data transform dictionary
152
+ self.data_transform_dict['data_target_column'] = self.target_column
153
+ # Storing target column encoding indicator to data transform dictionary
154
+ self.data_transform_dict['target_col_encode_ind'] = False
155
+ # Storing task type to data transform dictionary
156
+ self.data_transform_dict['classification_type']=self.is_classification_type()
157
+ # Storing params for performing one hot encoding
158
+ self.data_transform_dict['one_hot_encoding_fit_obj'] ={}
159
+ self.data_transform_dict['one_hot_encoding_drop_list'] = []
160
+
161
+ if auto:
162
+ self._remove_duplicate_rows()
163
+ self.progress_bar.update()
164
+
165
+ self._remove_futile_columns()
166
+ self.progress_bar.update()
167
+
168
+ self._handle_date_columns()
169
+ self.progress_bar.update()
170
+
171
+ self._handling_missing_value()
172
+ self.progress_bar.update()
173
+
174
+ self._impute_missing_value()
175
+ self.progress_bar.update()
176
+
177
+ self._encoding_categorical_columns()
178
+ self.progress_bar.update()
179
+
180
+ else:
181
+ self._remove_duplicate_rows()
182
+ self.progress_bar.update()
183
+
184
+ self._remove_futile_columns()
185
+ self.progress_bar.update()
186
+
187
+ self._handle_date_columns()
188
+ self.progress_bar.update()
189
+
190
+ self._custom_handling_missing_value()
191
+ self.progress_bar.update()
192
+
193
+ self._bin_code_transformation()
194
+ self.progress_bar.update()
195
+
196
+ self._string_manipulation()
197
+ self.progress_bar.update()
198
+
199
+ self._custom_categorical_encoding()
200
+ self.progress_bar.update()
201
+
202
+ self._mathematical_transformation()
203
+ self.progress_bar.update()
204
+
205
+ self._non_linear_transformation()
206
+ self.progress_bar.update()
207
+
208
+ self._anti_select_columns()
209
+ self.progress_bar.update()
210
+
211
+ return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
212
+
213
+ def _extract_list(self,
214
+ list1,
215
+ list2):
216
+ """
217
+ DESCRIPTION:
218
+ Function to extract elements from list1 which are not present in list2.
219
+
220
+ PARAMETERS:
221
+ list1:
222
+ Required Argument.
223
+ Specifies the first list for extracting elements from.
224
+ Types: list
225
+
226
+ list2:
227
+ Required Argument.
228
+ Specifies the second list to get elements for avoiding in first list while extracting.
229
+ Types: list
230
+
231
+ RETURN:
232
+ Returns extracted elements in form of list.
233
+
234
+ """
235
+ new_lst = list(set(list1) - set(list2))
236
+ return new_lst
237
+
238
+ def _remove_duplicate_rows(self):
239
+ """
240
+ DESCRIPTION:
241
+ Function to handles duplicate rows present in dataset.
242
+
243
+ """
244
+ self._display_msg(msg="\nHandling duplicate records present in dataset ...",
245
+ progress_bar=self.progress_bar,
246
+ show_data=True)
247
+ start_time = time.time()
248
+ rows = self.data.shape[0]
249
+ self.data=self.data.drop_duplicate()
250
+ if rows != self.data.shape[0]:
251
+ self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
252
+ data=self.data,
253
+ progress_bar=self.progress_bar)
254
+ self._display_msg(inline_msg=f"Remaining Rows in the data: {self.data.shape[0]}\n"\
255
+ f"Remaining Columns in the data: {self.data.shape[1]}",
256
+ progress_bar=self.progress_bar)
257
+ else:
258
+ self._display_msg(inline_msg="Analysis complete. No action taken.",
259
+ progress_bar=self.progress_bar)
260
+
261
+ end_time = time.time()
262
+ self._display_msg(msg="Total time to handle duplicate records: {:.2f} sec ".format(end_time - start_time),
263
+ progress_bar=self.progress_bar,
264
+ show_data=True)
265
+
266
+ def _get_distinct_count(self):
267
+ """
268
+ DESCRIPTION:
269
+ Function to get distinct count for all features and store it in dictionary for further use.
270
+ """
271
+ # Count of distinct value in each column
272
+ counts = self.data.select(self.data.columns).count(distinct=True)
273
+
274
+ # Dict containing disctinct value in each column
275
+ self.counts_dict = next(counts.itertuples())._asdict()
276
+
277
+ def _preprocess_data(self):
278
+ """
279
+ DESCRIPTION:
280
+ Function replaces the existing id column or adds the new id column and
281
+ removes columns with sinlge value/same values in the dataset.
282
+ """
283
+ # Get distinct value in each column
284
+ self._get_distinct_count()
285
+
286
+ # Columns to removed if
287
+ # id column detected or count of distinct value = 1
288
+ columns_to_be_removed = [col for col in self.data.columns if col.lower() == 'id' or self.counts_dict[f'count_{col}'] == 1]
289
+
290
+ # Removing id column, if exists
291
+ if len(columns_to_be_removed) != 0:
292
+ self.data = self.data.drop(columns_to_be_removed, axis=1)
293
+ # Storing irrelevent column list in data transform dictionary
294
+ self.data_transform_dict['drop_irrelevent_columns'] = columns_to_be_removed
295
+
296
+ # Adding id columns
297
+ obj = FillRowId(data=self.data, row_id_column='id')
298
+
299
+ self.data = obj.result
300
+
301
+ def _remove_futile_columns(self):
302
+ """
303
+ DESCRIPTION:
304
+ Function removes the futile columns from dataset.
305
+ """
306
+ self._display_msg(msg="\nHandling less significant features from data ...",
307
+ progress_bar=self.progress_bar,
308
+ show_data=True)
309
+ start_time = time.time()
310
+
311
+ self._preprocess_data()
312
+
313
+ # Handling string type target column in classification
314
+ # Performing Ordinal Encoding
315
+ if self.data_types[self.target_column] in ['str']:
316
+ self._ordinal_encoding([self.target_column])
317
+
318
+ # Detecting categorical columns
319
+ categorical_columns = [col for col, d_type in self.data._column_names_and_types if d_type == 'str']
320
+
321
+ # Detecting and removing futile columns, if categorical_column exists
322
+ if len(categorical_columns) != 0:
323
+
324
+ obj = CategoricalSummary(data=self.data,
325
+ target_columns=categorical_columns)
326
+
327
+ gfc_out = GetFutileColumns(data=self.data,
328
+ object=obj,
329
+ category_summary_column="ColumnName",
330
+ threshold_value =0.7)
331
+
332
+ # Extracting Futile columns
333
+ f_cols = [row[0] for row in gfc_out.result.itertuples()]
334
+
335
+ if len(f_cols) == 0:
336
+ self._display_msg(inline_msg="All categorical columns seem to be significant.",
337
+ progress_bar=self.progress_bar)
338
+ else:
339
+
340
+ self.data = self.data.drop(f_cols, axis=1)
341
+ # Storing futile column list in data transform dictionary
342
+ self.data_transform_dict['futile_columns'] = f_cols
343
+ self._display_msg(msg='Removing Futile columns:',
344
+ col_lst=f_cols,
345
+ progress_bar=self.progress_bar)
346
+ self._display_msg(msg='Sample of Data after removing Futile columns:',
347
+ data=self.data,
348
+ progress_bar=self.progress_bar)
349
+ end_time= time.time()
350
+ self._display_msg(msg="Total time to handle less significant features: {:.2f} sec ".format( end_time - start_time),
351
+ progress_bar=self.progress_bar,
352
+ show_data=True)
353
+
354
+ def _handle_date_component(self,
355
+ date_component_columns,
356
+ date_component):
357
+
358
+ """
359
+ DESCRIPTION:
360
+ Function to handle newly generated date components, i.e., day , month and year diff.
361
+ Based on their distinct values, binning is done with predefined prefix.
362
+ Binned component is used further as categorical features.
363
+
364
+ PARAMETERS:
365
+ date_component_columns:
366
+ Required Argument.
367
+ Specifies the list of newly generated differnt component of date features.
368
+ Types: list
369
+
370
+ date_component:
371
+ Required Argument.
372
+ Specifies identifier for the differnt component of date features, i.e., D - Days , M - Months and Y - Year diffs.
373
+ Types: str
374
+
375
+ """
376
+ # Check for day
377
+ if date_component == "D":
378
+ prefix_value = "Day_"
379
+ # Check for month
380
+ elif date_component == "M":
381
+ prefix_value = "Month_"
382
+ # Check for year diff
383
+ elif date_component == "Y":
384
+ prefix_value = "Year_diff_"
385
+
386
+ # Deciding bins based on distinct value of date component features.
387
+ for col in date_component_columns:
388
+ data_size = self.data.drop_duplicate(col).size
389
+ if data_size < 4:
390
+ num_bins = data_size
391
+ else:
392
+ num_bins = 4
393
+ # Performing bincode for converting date component to specific labels
394
+ fit_params = {
395
+ "data": self.data,
396
+ "target_columns": col,
397
+ "method_type":"Equal-Width",
398
+ "nbins": num_bins,
399
+ "label_prefix" : prefix_value
400
+ }
401
+ bin_code_fit = BincodeFit(**fit_params)
402
+
403
+ fit_params_map = {"D": "day_component_fit_object",
404
+ "M": "month_component_fit_object",
405
+ "Y": "year_diff_component_fit_object"}
406
+
407
+ # Storing fit object for each date component in data transform dictionary
408
+ self.data_transform_dict[fit_params_map[date_component]][col] = bin_code_fit.output
409
+
410
+ accumulate_columns = self._extract_list(self.data.columns, [col])
411
+ transform_params = {
412
+ "data": self.data,
413
+ "object": bin_code_fit.output,
414
+ "accumulate": accumulate_columns,
415
+ "persist": True
416
+ }
417
+ self.data = BincodeTransform(**transform_params).result
418
+
419
+ def _fetch_date_component(self,
420
+ process,
421
+ regex_str,
422
+ columns,
423
+ date_component):
424
+
425
+ """
426
+ DESCRIPTION:
427
+ Function to fetch newly generated date component features.
428
+ Passing ahead for performing binning.
429
+
430
+ PARAMETERS:
431
+ process:
432
+ Required Argument.
433
+ Specifies date component of date feature which is going to be fetched and handled.
434
+ Types: str
435
+
436
+ regex_str:
437
+ Required Argument.
438
+ Specifies regular expression for identifying newly generated date component features.
439
+ Types: str
440
+
441
+ columns:
442
+ Required Argument.
443
+ Specifies list of newly generated date component features.
444
+ Types: list
445
+
446
+ date_component:
447
+ Required Argument.
448
+ Specifies identifier for the differnt component of date features, i.e., D - Days , M - Months and Y - Year diffs.
449
+ Types: str
450
+
451
+ """
452
+ date_component_columns = [col for col in columns if re.search(regex_str+"$", col)]
453
+ if len(date_component_columns) != 0:
454
+ self._handle_date_component(date_component_columns,date_component)
455
+ self._display_msg(msg="Useful {} features:".format(process),
456
+ col_lst=date_component_columns,
457
+ progress_bar=self.progress_bar)
458
+ self._display_msg(msg="Updated dataset sample:",
459
+ data=self.data,
460
+ progress_bar=self.progress_bar)
461
+
462
+ else:
463
+ self._display_msg("\nNo useful feature found for {} component:".format(process),
464
+ progress_bar=self.progress_bar)
465
+
466
+ return date_component_columns
467
+
468
+ def _handle_date_columns_helper(self):
469
+
470
+ """
471
+ DESCRIPTION:
472
+ Function for dropping irrelevent date features.
473
+ Extracting day, month and year component from revelent date features.
474
+ Passing extracted component for performing binning.
475
+ """
476
+
477
+ # Dropping missing value for all date columns
478
+ self._display_msg(msg="\nDropping missing values for:",
479
+ col_lst=self.date_column_list,
480
+ progress_bar=self.progress_bar)
481
+
482
+ self.data = self.data.dropna(subset=self.date_column_list)
483
+
484
+ # Date columns list eligible for dropping from dataset
485
+ drop_date_cols = []
486
+
487
+ # Checking for single valued date columns
488
+ for col in self.date_column_list:
489
+ if self.data.drop_duplicate(col).size == self.data.shape[0]:
490
+ drop_date_cols.append(col)
491
+
492
+ if len(drop_date_cols) != 0:
493
+ self.data = self.data.drop(drop_date_cols, axis=1)
494
+ # Storing unique date column list in data transform dictionary
495
+ self.data_transform_dict['drop_unique_date_columns'] = drop_date_cols
496
+ self._display_msg(msg='Dropping date features with all unique value:',
497
+ col_lst = drop_date_cols,
498
+ progress_bar=self.progress_bar)
499
+
500
+ # Updated date columns list
501
+ self.date_column_list = [item for item in self.date_column_list if item not in drop_date_cols]
502
+
503
+ # List for storing newly generated date component features
504
+ new_columns=[]
505
+
506
+ # Extracting day, month and year difference from date columns
507
+ if len(self.date_column_list) != 0:
508
+
509
+ component_param={}
510
+ for col in self.date_column_list:
511
+
512
+ day_column=str(col)+"_day_comp"
513
+ month_column=str(col)+"_month_comp"
514
+ year_diff_column=str(col)+"_year_diff_comp"
515
+ new_columns.extend([day_column,month_column,year_diff_column])
516
+ day_query=("EXTRACT(DAY FROM {0})".format(col))
517
+ month_query=("EXTRACT(MONTH FROM {0})".format(col))
518
+ year_query=("EXTRACT(YEAR FROM CURRENT_DATE) - EXTRACT(YEAR FROM {0})".format(col))
519
+ component_param[day_column]=literal_column(day_query,INTEGER())
520
+ component_param[month_column]=literal_column(month_query,INTEGER())
521
+ component_param[year_diff_column]=literal_column(year_query,INTEGER())
522
+
523
+ self.data=self.data.assign(**component_param)
524
+ # Storing newly generated date component list along with parameters in data transform dictionary
525
+ self.data_transform_dict['extract_date_comp_col'] = self.date_column_list
526
+ self.data_transform_dict['extract_date_comp_param'] = component_param
527
+
528
+ # Dropping date columns as we have already extracted day, month and year in new columns
529
+ self.data = self.data.drop(self.date_column_list, axis=1)
530
+ self._display_msg(msg='List of newly generated features from existing date features:',
531
+ col_lst=new_columns,
532
+ progress_bar=self.progress_bar)
533
+ self._display_msg(msg='List of newly generated features from existing date features:',
534
+ data=self.data,
535
+ progress_bar=self.progress_bar)
536
+
537
+ drop_cols=[]
538
+
539
+ for col in new_columns:
540
+ distinct_rows = self.data.drop_duplicate(col).size
541
+ if distinct_rows == self.data.shape[0]:
542
+ drop_cols.append(col)
543
+ self._display_msg(msg='Dropping features with all unique values:',
544
+ col_lst=col,
545
+ progress_bar=self.progress_bar)
546
+
547
+ elif distinct_rows == 1:
548
+ drop_cols.append(col)
549
+ self._display_msg(msg='Dropping features with single value:',
550
+ col_lst=col,
551
+ progress_bar=self.progress_bar)
552
+
553
+ # Dropping columns from drop_cols list
554
+ if len(drop_cols) != 0:
555
+ self.data = self.data.drop(drop_cols, axis=1)
556
+ # Storing extract date component list for drop in data transform dictionary
557
+ self.data_transform_dict['drop_extract_date_columns'] = drop_cols
558
+
559
+ # Extracting all newly generated columns
560
+ new_columns = [item for item in new_columns if item not in drop_cols]
561
+
562
+ # Storing each date component transformation fit object in data transform dictionary
563
+ self.data_transform_dict = {**self.data_transform_dict,
564
+ 'day_component_fit_object': {},
565
+ 'month_component_fit_object': {},
566
+ 'year_diff_component_fit_object': {}}
567
+ # Grouping date components based on types i.e., day, month, and year_diff for performing binning
568
+ if len(new_columns) != 0:
569
+ self.day_columns = self._fetch_date_component("day", "_day_comp", new_columns, "D")
570
+ self.month_columns = self._fetch_date_component("month", "_month_comp", new_columns, "M")
571
+ self.year_diff_columns = self._fetch_date_component("year_diff", "_year_diff_comp", new_columns, "Y")
572
+ self._display_msg(inline_msg="No useful date component found",
573
+ progress_bar=self.progress_bar)
574
+
575
+ self._display_msg(msg='Updated dataset sample after handling date features:',
576
+ data=self.data,
577
+ progress_bar=self.progress_bar)
578
+ else:
579
+ self._display_msg(inline_msg="No useful date feature found",
580
+ progress_bar=self.progress_bar)
581
+
582
+ def _handle_date_columns(self):
583
+
584
+ """
585
+ DESCRIPTION:
586
+ Function to handle date columns in dataset if any.
587
+ Perform relevent transformation by extracting different components, i.e., Day , Month and Year.
588
+ """
589
+ self._display_msg(msg="\nHandling Date Features ...",
590
+ progress_bar=self.progress_bar,
591
+ show_data=True)
592
+ start_time = time.time()
593
+
594
+ self.date_column_list = [col for col, d_type in self.data._column_names_and_types \
595
+ if d_type in ["datetime.date","datetime.datetime"]]
596
+
597
+ if len(self.date_column_list) == 0:
598
+ self._display_msg(inline_msg="Dataset does not contain any feature related to dates.",
599
+ progress_bar=self.progress_bar)
600
+ else:
601
+ # Storing date column list in data transform dictionary
602
+ self.data_transform_dict['date_columns'] = self.date_column_list
603
+ self._handle_date_columns_helper()
604
+
605
+ end_time = time.time()
606
+ self._display_msg(msg="Total time to handle date features: {:.2f} sec\n".format(end_time-start_time),
607
+ progress_bar=self.progress_bar,
608
+ show_data=True)
609
+
610
+ def _missing_count_per_column(self):
611
+ """
612
+ DESCRIPTION:
613
+ Function finds and returns a dictnoary containing list of columns
614
+ with missing values.
615
+
616
+ Returns:
617
+ dict, keys represent column names and
618
+ values represent the missing value count for corresponding column.
619
+ """
620
+
621
+ # Removing rows with missing target column value
622
+ self.data = self.data.dropna(subset=[self.target_column])
623
+
624
+ obj = ColumnSummary(data=self.data,
625
+ target_columns=self.data.columns,
626
+ volatile=True)
627
+
628
+ cols_miss_val={}
629
+ # Iterating over each row in the column summary result
630
+ for row in obj.result.itertuples():
631
+ # Checking if the third element of the row (missing values count) is greater than 0
632
+ if row[3] > 0:
633
+ # If so, add an entry to the 'cols_miss_val' dictionary
634
+ # Key: column name (first element of the row)
635
+ # Value: count of missing values in the column (third element of the row)
636
+ cols_miss_val[row[0]] = row[3]
637
+
638
+ return cols_miss_val
639
+
640
+ def _handling_missing_value(self):
641
+ """
642
+ DESCRIPTION:
643
+ Function detects the missing values in the each feature of dataset,
644
+ then performs these operation based on condition :-
645
+ 1. deleting rows from columns/feature
646
+ 2. dropping columns from dataset
647
+ """
648
+ self._display_msg(msg="\nChecking Missing values in dataset ...",
649
+ progress_bar=self.progress_bar,
650
+ show_data=True)
651
+ start_time = time.time()
652
+
653
+ # Flag for missing values
654
+ msg_val_found=0
655
+
656
+ #num of rows
657
+ d_size = self.data.shape[0]
658
+
659
+ delete_rows = []
660
+ drop_cols = []
661
+ self.imputation_cols = {}
662
+
663
+ cols_miss_val = self._missing_count_per_column()
664
+
665
+ if len(cols_miss_val) != 0:
666
+ self._display_msg(msg="Columns with their missing values:",
667
+ col_lst=cols_miss_val,
668
+ progress_bar=self.progress_bar)
669
+
670
+ # Get distinct value in each column
671
+ self._get_distinct_count()
672
+
673
+ # Iterating over columns with missing values
674
+ for col,val in cols_miss_val.items():
675
+
676
+ # Drop col, if count of missing value > 60%
677
+ if val > .6*d_size:
678
+ drop_cols.append(col)
679
+ continue
680
+
681
+ if self.data_types[col] in ['float', 'int']:
682
+ corr_df = self.data[col].corr(self.data[self.target_column])
683
+ corr_val = self.data.assign(True, corr_=corr_df)
684
+ related = next(corr_val.itertuples())[0]
685
+
686
+ # Delete row, if count of missing value < 2% and
687
+ # Relation b/w target column and numeric column <= .25
688
+ if val < .02*d_size and related <= .25:
689
+ delete_rows.append(col)
690
+ continue
691
+
692
+ elif self.data_types[col] in ['str']:
693
+ # Delete row, if count of missing value < 4%
694
+ if val < .04*d_size:
695
+ delete_rows.append(col)
696
+ continue
697
+ # Drop col, if unique count of column > 75%
698
+ elif self.counts_dict[f'count_{col}'] > .75*(d_size-val):
699
+ drop_cols.append(col)
700
+ continue
701
+
702
+ # Remaining column for imputation
703
+ self.imputation_cols[col] = val
704
+ # Storing columns with missing value for imputation in data transform dictionary
705
+ self.data_transform_dict['imputation_columns'] = self.imputation_cols
706
+
707
+ if len(delete_rows) != 0:
708
+ self.data = self.data.dropna(subset=delete_rows)
709
+ msg_val_found=1
710
+ self._display_msg(msg='Deleting rows of these columns for handling missing values:',
711
+ col_lst=delete_rows,
712
+ progress_bar=self.progress_bar)
713
+
714
+ if len(drop_cols) != 0:
715
+ self.data = self.data.drop(drop_cols, axis=1)
716
+ msg_val_found=1
717
+ # Storing columns with missing value for drop in data transform dictionary
718
+ self.data_transform_dict['drop_missing_columns'] = drop_cols
719
+ self._display_msg(msg='Dropping these columns for handling missing values:',
720
+ col_lst=drop_cols,
721
+ progress_bar=self.progress_bar)
722
+
723
+ if len(self.imputation_cols) == 0 and msg_val_found ==0:
724
+ self._display_msg(inline_msg="No Missing Values Detected.",
725
+ progress_bar=self.progress_bar)
726
+
727
+ end_time = time.time()
728
+ self._display_msg(msg="Total time to find missing values in data: {:.2f} sec ".format( end_time - start_time),
729
+ progress_bar=self.progress_bar,
730
+ show_data=True)
731
+
732
+ def _impute_helper(self):
733
+ """
734
+ DESCRIPTION:
735
+ Function decides the imputation methods [mean/ median/ mode] for columns with missing values
736
+ on the basis of skewness of column in the dataset.
737
+
738
+ RETURNS:
739
+ A tuple containing,
740
+ col_stat (name of columns with missing value)
741
+ stat (imputation method for respective columns)
742
+ """
743
+ col_stat = []
744
+ stat = []
745
+
746
+ # Converting o/p of skew() into dictonary with key as column name and value as skewness value
747
+ df = self.data.skew()
748
+ skew_data = next(df.itertuples())._asdict()
749
+
750
+ # Iterating over columns with missing value
751
+ for key, val in self.imputation_cols.items():
752
+
753
+ col_stat.append(key)
754
+ if self.data_types[key] in ['float', 'int']:
755
+ val = skew_data[f'skew_{key}']
756
+ # Median imputation method, if abs(skewness value) > 1
757
+ if abs(val) > 1:
758
+ stat.append('median')
759
+ # Mean imputation method, if abs(skewness value) <= 1
760
+ else:
761
+ stat.append('mean')
762
+ # Mode imputation method, if categorical column
763
+ else:
764
+ stat.append('mode')
765
+
766
+ self._display_msg(msg="Columns with their imputation method:",
767
+ col_lst=dict(zip(col_stat, stat)),
768
+ progress_bar=self.progress_bar)
769
+
770
+ return col_stat, stat
771
+
772
+ def _impute_missing_value(self):
773
+ """
774
+ DESCRIPTION:
775
+ Function performs the imputation on columns/features with missing values in the dataset.
776
+ """
777
+
778
+ start_time = time.time()
779
+ self._display_msg(msg="\nImputing Missing Values ...",
780
+ progress_bar=self.progress_bar,
781
+ show_data=True)
782
+
783
+ if len(self.imputation_cols) != 0:
784
+
785
+ # List of columns and imputation Method
786
+ col_stat, stat = self._impute_helper()
787
+
788
+ fit_obj = SimpleImputeFit(data=self.data,
789
+ stats_columns=col_stat,
790
+ stats=stat,
791
+ volatile=True)
792
+
793
+ # Storing fit object for imputation in data transform dictionary
794
+ self.data_transform_dict['imputation_fit_object'] = fit_obj.output
795
+ sm = SimpleImputeTransform(data=self.data,
796
+ object=fit_obj,
797
+ volatile=True)
798
+
799
+ self.data = sm.result
800
+ self._display_msg(msg="Sample of Data after Imputation:",
801
+ data=self.data,
802
+ progress_bar=self.progress_bar)
803
+ else:
804
+ self._display_msg(inline_msg="No imputation is Required.",
805
+ progress_bar=self.progress_bar)
806
+
807
+ end_time = time.time()
808
+ self._display_msg(msg="Time taken to perform imputation: {:.2f} sec ".format(end_time - start_time),
809
+ progress_bar=self.progress_bar,
810
+ show_data=True)
811
+
812
+
813
+ def _custom_handling_missing_value(self):
814
+ """
815
+ DESCRIPTION:
816
+ Function to perform customized missing value handling for features based on user input.
817
+
818
+ """
819
+ # Fetching user input for performing missing value handling
820
+ missing_handling_input = self.custom_data.get("MissingValueHandlingIndicator", False)
821
+
822
+ if missing_handling_input:
823
+ # Fetching parameters required for performing
824
+ missing_handling_param = self.custom_data.get("MissingValueHandlingParam", None)
825
+ if missing_handling_param:
826
+ # Fetching user input for different methods missing value handling
827
+ drop_col_ind = missing_handling_param.get("DroppingColumnIndicator", False)
828
+ drop_row_ind = missing_handling_param.get("DroppingRowIndicator", False)
829
+ impute_ind = missing_handling_param.get("ImputeMissingIndicator", False)
830
+ # Checking for user input if all methods indicator are false or not
831
+ if not any([drop_col_ind, drop_row_ind, impute_ind]):
832
+ self._display_msg(inline_msg="No method information provided for performing customized missing value handling. \
833
+ AutoML will proceed with default missing value handling method.",
834
+ progress_bar=self.progress_bar)
835
+
836
+ else:
837
+ # Checking user input for dropping missing value columns
838
+ if drop_col_ind:
839
+ drop_col_list = missing_handling_param.get("DroppingColumnList", [])
840
+ # Storing customcolumns with missing value for drop in data transform dictionary
841
+ self.data_transform_dict["custom_drop_missing_columns"] = drop_col_list
842
+ if len(drop_col_list):
843
+ # Checking for column present in dataset or not
844
+ _Validators._validate_dataframe_has_argument_columns(drop_col_list, "DroppingColumnList", self.data, "df")
845
+
846
+ self._display_msg(msg="\nDropping these columns for handling customized missing value:",
847
+ col_lst=drop_col_list,
848
+ progress_bar=self.progress_bar)
849
+ self.data = self.data.drop(drop_col_list, axis=1)
850
+ else:
851
+ self._display_msg(inline_msg="No information provided for dropping missing value containing columns.",
852
+ progress_bar=self.progress_bar)
853
+
854
+ # Checking user input for dropping missing value rows
855
+ if drop_row_ind:
856
+ drop_row_list = missing_handling_param.get("DroppingRowList", [])
857
+ if len(drop_row_list):
858
+ # Checking for column present in dataset or not
859
+ _Validators._validate_dataframe_has_argument_columns(drop_row_list, "DroppingRowList", self.data, "df")
860
+
861
+ self._display_msg(msg="Dropping missing rows in these columns for handling customized missing value:",
862
+ col_lst=drop_row_list,
863
+ progress_bar=self.progress_bar)
864
+ self.data = self.data.dropna(subset = drop_row_list)
865
+ else:
866
+ self._display_msg(inline_msg="No information provided for dropping missing value containing rows.",
867
+ progress_bar=self.progress_bar)
868
+ # Checking user input for missing value imputation
869
+ if impute_ind:
870
+ stat_list = missing_handling_param.get("StatImputeList", None)
871
+ stat_method = missing_handling_param.get("StatImputeMethod", None)
872
+ literal_list = missing_handling_param.get("LiteralImputeList", None)
873
+ literal_value = missing_handling_param.get("LiteralImputeValue", None)
874
+
875
+ # Checking for column present in dataset or not
876
+ _Validators._validate_dataframe_has_argument_columns(stat_list, "StatImputeList", self.data, "df")
877
+
878
+ _Validators._validate_dataframe_has_argument_columns(literal_list, "LiteralImputeList", self.data, "df")
879
+
880
+ # Creating fit params
881
+ fit_param = {
882
+ "data" : self.data,
883
+ "stats_columns" : stat_list,
884
+ "stats" : stat_method,
885
+ "literals_columns" : literal_list,
886
+ "literals" : literal_value
887
+ }
888
+ # Fitting on dataset
889
+ fit_obj = SimpleImputeFit(**fit_param)
890
+ # Storing custom fit object for imputation in data transform dictionary
891
+ self.data_transform_dict["custom_imputation_ind"] = True
892
+ self.data_transform_dict["custom_imputation_fit_object"] = fit_obj.output
893
+ # Creating transform params
894
+ transform_param = {
895
+ "data" : self.data,
896
+ "object" : fit_obj.output,
897
+ "persist" : True
898
+ }
899
+ # Updating dataset with transform result
900
+ self.data = SimpleImputeTransform(**transform_param).result
901
+ self._display_msg(msg="Updated dataset sample after performing customized missing value imputation:",
902
+ data=self.data,
903
+ progress_bar=self.progress_bar)
904
+ else:
905
+ self._display_msg(inline_msg="No information provided for performing customized missing value handling. \
906
+ AutoML will proceed with default missing value handling method.",
907
+ progress_bar=self.progress_bar)
908
+ else:
909
+ self._display_msg(inline_msg="Proceeding with default option for missing value imputation.",
910
+ progress_bar=self.progress_bar)
911
+
912
+ # Proceeding with default method for handling remaining missing values
913
+ self._display_msg(inline_msg="Proceeding with default option for handling remaining missing values.",
914
+ progress_bar=self.progress_bar)
915
+ self._handling_missing_value()
916
+ self._impute_missing_value()
917
+
918
+ def _bin_code_transformation(self):
919
+ """
920
+ DESCRIPTION:
921
+ Function to perform customized binning on features based on user input.
922
+
923
+ """
924
+ # Fetching user input for performing bin code transformation.
925
+ bin_code_input = self.custom_data.get("BincodeIndicator", False)
926
+
927
+ if bin_code_input:
928
+ # Storing custom bin code transformation indicator in data transform dictionary
929
+ self.data_transform_dict['custom_bincode_ind'] = True
930
+ # Fetching list required for performing transfomation.
931
+ extracted_col = self.custom_data.get("BincodeParam", None)
932
+ if not extracted_col:
933
+ self._display_msg(inline_msg="BincodeParam is empty. Skipping customized bincode transformation.",
934
+ progress_bar=self.progress_bar)
935
+ else:
936
+ # Creating list for storing column and binning informartion for performing transformation
937
+ equal_width_bin_list = []
938
+ equal_width_bin_columns = []
939
+ var_width_bin_list = []
940
+ var_width_bin_columns = []
941
+
942
+ # Checking for column present in dataset or not
943
+ _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "BincodeParam", self.data, "df")
944
+
945
+ for col,transform_val in extracted_col.items():
946
+ # Fetching type of binning to be performed
947
+ bin_trans_type = transform_val["Type"]
948
+ # Fetching number of bins to be created
949
+ num_bin = transform_val["NumOfBins"]
950
+ # Checking for bin types and adding details into lists for binning
951
+ if bin_trans_type == "Equal-Width":
952
+ bins = num_bin
953
+ equal_width_bin_list.append(bins)
954
+ equal_width_bin_columns.append(col)
955
+ elif bin_trans_type == "Variable-Width":
956
+ var_width_bin_columns.append(col)
957
+ bins = num_bin
958
+ for i in range(1, bins+1):
959
+ # Forming binning name as per expected input
960
+ temp="Bin_"+str(i)
961
+ # Fetching required details for variable type binning
962
+ minval = transform_val[temp]["min_value"]
963
+ maxval = transform_val[temp]["max_value"]
964
+ label = transform_val[temp]["label"]
965
+ # Appending information of each bin
966
+ var_width_bin_list.append({ "ColumnName":col, "MinValue":minval, "MaxValue":maxval, "Label":label})
967
+ # Checking column list for performing binning with Equal-Width.
968
+ if len(equal_width_bin_columns) != 0:
969
+ # Adding fit parameter for performing binning with Equal-Width.
970
+ fit_params={
971
+ "data" : self.data,
972
+ "target_columns": equal_width_bin_columns,
973
+ "method_type" : "Equal-Width",
974
+ "nbins" : bins
975
+ }
976
+ eql_bin_code_fit = BincodeFit(**fit_params)
977
+ # Storing fit object and column list for Equal-Width binning in data transform dictionary
978
+ self.data_transform_dict['custom_eql_bincode_col'] = equal_width_bin_columns
979
+ self.data_transform_dict['custom_eql_bincode_fit_object'] = eql_bin_code_fit.output
980
+ # Extracting accumulate columns
981
+ accumulate_columns = self._extract_list(self.data.columns, equal_width_bin_columns)
982
+ # Adding transform parameters for performing binning with Equal-Width.
983
+ eql_transform_params={
984
+ "data" : self.data,
985
+ "object" : eql_bin_code_fit.output,
986
+ "accumulate" : accumulate_columns,
987
+ "persist" : True,
988
+ }
989
+ self.data = BincodeTransform(**eql_transform_params).result
990
+ self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
991
+ data=self.data,
992
+ progress_bar=self.progress_bar)
993
+ else:
994
+ self._display_msg(inline_msg="No information provided for Equal-Width Transformation.",
995
+ progress_bar=self.progress_bar)
996
+
997
+ if len(var_width_bin_columns) != 0:
998
+ # Creating pandas dataframe and then teradata dataframe for storing binning information
999
+ var_bin_table = pd.DataFrame(var_width_bin_list, columns=["ColumnName", "MinValue", "MaxValue", "Label"])
1000
+ self._display_msg(msg="Variable-Width binning information:-",
1001
+ data=var_bin_table,
1002
+ progress_bar=self.progress_bar)
1003
+ copy_to_sql(df=var_bin_table, table_name="automl_bincode_var_fit", temporary=True)
1004
+ var_fit_input = DataFrame.from_table("automl_bincode_var_fit")
1005
+ fit_params = {
1006
+ "data" : self.data,
1007
+ "fit_data": var_fit_input,
1008
+ "fit_data_order_column" : ["MinValue", "MaxValue"],
1009
+ "target_columns": var_width_bin_columns,
1010
+ "minvalue_column" : "MinValue",
1011
+ "maxvalue_column" : "MaxValue",
1012
+ "label_column" : "Label",
1013
+ "method_type" : "Variable-Width",
1014
+ "label_prefix" : "label_prefix"
1015
+ }
1016
+ var_bin_code_fit = BincodeFit(**fit_params)
1017
+ # Storing fit object and column list for Variable-Width binning in data transform dictionary
1018
+ self.data_transform_dict['custom_var_bincode_col'] = var_width_bin_columns
1019
+ self.data_transform_dict['custom_var_bincode_fit_object'] = var_bin_code_fit.output
1020
+ accumulate_columns = self._extract_list(self.data.columns, var_width_bin_columns)
1021
+ var_transform_params = {
1022
+ "data" : self.data,
1023
+ "object" : var_bin_code_fit.output,
1024
+ "object_order_column" : "TD_MinValue_BINFIT",
1025
+ "accumulate" : accumulate_columns,
1026
+ "persist" : True
1027
+ }
1028
+ self.data = BincodeTransform(**var_transform_params).result
1029
+ self._display_msg(msg="Updated dataset sample after performing Variable-Width binning:",
1030
+ data=self.data,
1031
+ progress_bar=self.progress_bar)
1032
+ else:
1033
+ self._display_msg(inline_msg="No information provided for Variable-Width Transformation.",
1034
+ progress_bar=self.progress_bar)
1035
+ else:
1036
+ self._display_msg(inline_msg="No information provided for Variable-Width Transformation.",
1037
+ progress_bar=self.progress_bar)
1038
+
1039
+ def _string_manipulation(self):
1040
+ """
1041
+ DESCRIPTION:
1042
+ Function to perform customized string manipulations on categorical features based on user input.
1043
+
1044
+ """
1045
+ # Fetching user input for performing string manipulation.
1046
+ str_mnpl_input = self.custom_data.get("StringManipulationIndicator", False)
1047
+ # Checking user input for string manipulation on categrical features.
1048
+ if str_mnpl_input:
1049
+ # Storing custom string manipulation indicator in data transform dictionary
1050
+ self.data_transform_dict['custom_string_manipulation_ind'] = True
1051
+ # Fetching list required for performing operation.
1052
+ extracted_col = self.custom_data.get("StringManipulationParam", None)
1053
+ if not extracted_col:
1054
+ self._display_msg(inline_msg="No information provided for performing string manipulation.",
1055
+ progress_bar=self.progress_bar)
1056
+ else:
1057
+ # Checking for column present in dataset or not
1058
+ _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "StringManipulationParam", self.data, "df")
1059
+
1060
+ for target_col,transform_val in extracted_col.items():
1061
+ self.data = self._str_method_mapping(target_col, transform_val)
1062
+ # Storing custom string manipulation parameters in data transform dictionary
1063
+ self.data_transform_dict['custom_string_manipulation_param'] = extracted_col
1064
+
1065
+ self._display_msg(msg="Updated dataset sample after performing string manipulation:",
1066
+ data=self.data,
1067
+ progress_bar=self.progress_bar)
1068
+ else:
1069
+ self._display_msg(inline_msg="Skipping customized string manipulation.")
1070
+
1071
+ def _str_method_mapping(self,
1072
+ target_col,
1073
+ transform_val):
1074
+ """
1075
+ DESCRIPTION:
1076
+ Function to map customized parameters according to passed method and
1077
+ performs string manipulation on categorical features.
1078
+
1079
+ PARAMETERS:
1080
+ target_col:
1081
+ Required Argument.
1082
+ Specifies feature for applying string manipulation.
1083
+ Types: str
1084
+
1085
+ transform_val:
1086
+ Required Argument.
1087
+ Specifies different parameter require for applying string manipulation.
1088
+ Types: dict
1089
+
1090
+ RETURNS:
1091
+ Dataframe containing transformed data after applying string manipulation.
1092
+
1093
+ """
1094
+ # Creating list of features for accumulating while performing string manipulation on certain features
1095
+ accumulate_columns = self._extract_list(self.data.columns, [target_col])
1096
+
1097
+ # Fetching required parameters from json object
1098
+ string_operation = transform_val["StringOperation"]
1099
+
1100
+ # Storing general parameters for performing string transformation
1101
+ fit_params = {
1102
+ "data" : self.data,
1103
+ "target_columns" : target_col,
1104
+ "string_operation" : string_operation,
1105
+ "accumulate" : accumulate_columns,
1106
+ "inplace" : True,
1107
+ "persist" : True
1108
+ }
1109
+ # Adding additional parameters based on string operation type
1110
+ if string_operation in ["StringCon", "StringTrim"]:
1111
+ string_argument = transform_val["String"]
1112
+ fit_params = {**fit_params,
1113
+ "string" : string_argument}
1114
+ elif string_operation == "StringPad":
1115
+ string_argument = transform_val["String"]
1116
+ string_length = transform_val["StringLength"]
1117
+ fit_params = {**fit_params,
1118
+ "string" : string_argument,
1119
+ "string_length" : string_length}
1120
+ elif string_operation == "Substring":
1121
+ string_index = transform_val["StartIndex"]
1122
+ string_length = transform_val["StringLength"]
1123
+ fit_params = {**fit_params,
1124
+ "start_index" : string_index,
1125
+ "string_length" : string_length}
1126
+
1127
+ # returning dataset after performing string manipulation
1128
+ return StrApply(**fit_params).result
1129
+
1130
+ def _one_hot_encoding(self,
1131
+ one_hot_columns,
1132
+ unique_counts):
1133
+ """
1134
+ DESCRIPTION:
1135
+ Function performs the one hot encoding to categorcial columns/features in the dataset.
1136
+
1137
+ PARAMETERS:
1138
+ one_hot_columns:
1139
+ Required Argument.
1140
+ Specifies the categorical columns for which one hot encoding will be performed.
1141
+ Types: str or list of strings (str)
1142
+
1143
+ unique_counts:
1144
+ Required Argument.
1145
+ Specifies the unique counts in the categorical columns.
1146
+ Types: int or list of integer (int)
1147
+
1148
+ """
1149
+ # TD function will add extra column_other in onehotEncoding, so
1150
+ # initailizing this list to remove those extra columns
1151
+ drop_lst = [ele + "_other" for ele in one_hot_columns]
1152
+ # Adding fit parameters for performing encoding
1153
+ fit_params = {
1154
+ "data" : self.data,
1155
+ "approach" : "auto",
1156
+ "is_input_dense" : True,
1157
+ "target_column" : one_hot_columns,
1158
+ "category_counts" : unique_counts,
1159
+ "other_column" : "other"
1160
+ }
1161
+ # Performing one hot encoding fit on target columns
1162
+ fit_obj = OneHotEncodingFit(**fit_params)
1163
+ # Storing indicator, fit object and column drop list for one hot encoding in data transform dictionary
1164
+ self.data_transform_dict['one_hot_encoding_ind'] = True
1165
+ self.data_transform_dict['one_hot_encoding_fit_obj'].update({self.one_hot_obj_count : fit_obj.result})
1166
+ self.data_transform_dict['one_hot_encoding_drop_list'].extend(drop_lst)
1167
+ self.one_hot_obj_count = self.one_hot_obj_count + 1
1168
+ # Adding transform parameters for performing encoding
1169
+ transform_params = {
1170
+ "data" : self.data,
1171
+ "object" : fit_obj.result,
1172
+ "is_input_dense" : True,
1173
+ "persist" : True
1174
+ }
1175
+ # Performing one hot encoding transformation
1176
+ transform_obj = OneHotEncodingTransform(**transform_params)
1177
+ self.data = transform_obj.result.drop(drop_lst, axis=1)
1178
+
1179
+ def _ordinal_encoding(self,
1180
+ ordinal_columns):
1181
+ """
1182
+ DESCRIPTION:
1183
+ Function performs the ordinal encoding to categorcial columns or features in the dataset.
1184
+
1185
+ PARAMETERS:
1186
+ ordinal_columns:
1187
+ Required Argument.
1188
+ Specifies the categorical columns for which ordinal encoding will be performed.
1189
+ Types: str or list of strings (str)
1190
+ """
1191
+ # Adding fit parameters for performing encoding
1192
+ fit_params = {
1193
+ "data" : self.data,
1194
+ "target_column" : ordinal_columns,
1195
+ "volatile" : True
1196
+ }
1197
+ # Performing ordinal encoding fit on target columns
1198
+ ord_fit_obj = OrdinalEncodingFit(**fit_params)
1199
+ # Storing fit object and column list for ordinal encoding in data transform dictionary
1200
+ if ordinal_columns[0] != self.target_column:
1201
+ self.data_transform_dict["custom_ord_encoding_fit_obj"] = ord_fit_obj.result
1202
+ self.data_transform_dict['custom_ord_encoding_col'] = ordinal_columns
1203
+ else:
1204
+ self.data_transform_dict['target_col_encode_ind'] = True
1205
+ self.data_transform_dict['target_col_ord_encoding_fit_obj'] = ord_fit_obj.result
1206
+ # Extracting accumulate columns
1207
+ accumulate_columns = self._extract_list(self.data.columns, ordinal_columns)
1208
+ # Adding transform parameters for performing encoding
1209
+ transform_params = {
1210
+ "data" : self.data,
1211
+ "object" : ord_fit_obj.result,
1212
+ "accumulate" : accumulate_columns,
1213
+ "persist" : True
1214
+ }
1215
+ # Performing ordinal encoding transformation
1216
+ self.data = OrdinalEncodingTransform(**transform_params).result
1217
+
1218
+ if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
1219
+ self.target_label = ord_fit_obj
1220
+
1221
+
1222
+ def _target_encoding(self,
1223
+ target_encoding_list):
1224
+ """
1225
+ DESCRIPTION:
1226
+ Function performs the target encoding to categorcial columns/features in the dataset.
1227
+
1228
+ PARAMETERS:
1229
+ target_encoding_list:
1230
+ Required Argument.
1231
+ Specifies the categorical columns for which target encoding will be performed.
1232
+ Types: str or list of strings (str)
1233
+ """
1234
+ # Fetching all columns on which target encoding will be performed.
1235
+ target_columns= list(target_encoding_list.keys())
1236
+ # Checking for column present in dataset or not
1237
+ _Validators._validate_dataframe_has_argument_columns(target_columns, "TargetEncodingList", self.data, "df")
1238
+ # Finding distinct values and counts for columns.
1239
+ cat_sum = CategoricalSummary(data = self.data,
1240
+ target_columns = target_columns)
1241
+ category_data=cat_sum.result.groupby("ColumnName").count()
1242
+ category_data = category_data.assign(drop_columns = True,
1243
+ ColumnName = category_data.ColumnName,
1244
+ CategoryCount = category_data.count_DistinctValue)
1245
+ # Storing indicator and fit object for target encoding in data transform dictionary
1246
+ self.data_transform_dict["custom_target_encoding_ind"] = True
1247
+ self.data_transform_dict["custom_target_encoding_fit_obj"] = {}
1248
+ # Fetching required argument for performing target encoding
1249
+ for col,transform_val in target_encoding_list.items():
1250
+ encoder_method = transform_val["encoder_method"]
1251
+ response_column = transform_val["response_column"]
1252
+ # Adding fit parameters for performing encoding
1253
+ fit_params = {
1254
+ "data" : self.data,
1255
+ "category_data" : category_data,
1256
+ "encoder_method" : encoder_method,
1257
+ "target_columns" : col,
1258
+ "response_column" : response_column
1259
+ }
1260
+ if encoder_method == "CBM_DIRICHLET":
1261
+ num_distinct_responses=transform_val["num_distinct_responses"]
1262
+ fit_params = {**fit_params,
1263
+ "num_distinct_responses" : num_distinct_responses}
1264
+ # Performing target encoding fit on target columns
1265
+ tar_fit_obj = TargetEncodingFit(**fit_params)
1266
+ # Storing each column fit object for target encoding in data transform dictionary
1267
+ self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj})
1268
+ # Extracting accumulate columns
1269
+ accumulate_columns = self._extract_list(self.data.columns, [col])
1270
+ # Adding transform parameters for performing encoding
1271
+ transform_params = {
1272
+ "data" : self.data,
1273
+ "object" : tar_fit_obj,
1274
+ "accumulate" : accumulate_columns,
1275
+ "persist" : True
1276
+ }
1277
+ # Performing ordinal encoding transformation
1278
+ self.data = TargetEncodingTransform(**transform_params).result
1279
+
1280
+ def _encoding_categorical_columns(self):
1281
+ """
1282
+ DESCRIPTION:
1283
+ Function detects the categorical columns and performs encoding on categorical columns in the dataset.
1284
+ """
1285
+ self._display_msg(msg="\nPerforming encoding for categorical columns ...",
1286
+ progress_bar=self.progress_bar,
1287
+ show_data=True)
1288
+ start_time = time.time()
1289
+
1290
+ ohe_col = []
1291
+ unique_count = []
1292
+
1293
+ # List of columns before one hot
1294
+ col_bf_ohe = self.data.columns
1295
+
1296
+ # Get distinct value in each column
1297
+ self._get_distinct_count()
1298
+
1299
+ # Detecting categorical columns with thier unique counts
1300
+ for col, d_type in self.data._column_names_and_types:
1301
+ if d_type in ['str']:
1302
+ ohe_col.append(col)
1303
+ unique_count.append(self.counts_dict[f'count_{col}'])
1304
+
1305
+ if len(ohe_col) != 0:
1306
+ self._one_hot_encoding(ohe_col, unique_count)
1307
+
1308
+ self._display_msg(msg="ONE HOT Encoding these Columns:",
1309
+ col_lst=ohe_col,
1310
+ progress_bar=self.progress_bar)
1311
+ else:
1312
+ self._display_msg(inline_msg="Encoding not required.",
1313
+ progress_bar=self.progress_bar)
1314
+
1315
+ # List of columns after one hot
1316
+ col_af_ohe = self.data.columns
1317
+
1318
+ # List of excluded columns from outlier processing and scaling
1319
+ self.excluded_cols= self._extract_list(col_af_ohe, col_bf_ohe)
1320
+
1321
+ end_time = time.time()
1322
+ self._display_msg(msg="Time taken to encode the columns: {:.2f} sec".format( end_time - start_time),
1323
+ progress_bar=self.progress_bar,
1324
+ show_data=True)
1325
+
1326
+ def _custom_categorical_encoding(self):
1327
+ """
1328
+ DESCRIPTION:
1329
+ Function to perform specific encoding on the categorical columns based on user input.
1330
+ if validation fails, default encoding is getting performed on all remaining categorical columns.
1331
+ """
1332
+ self._display_msg(msg="\nStarting Customized Categorical Feature Encoding ...",
1333
+ progress_bar=self.progress_bar)
1334
+ cat_end_input = self.custom_data.get("CategoricalEncodingIndicator", False)
1335
+ # Checking user input for categorical encoding
1336
+ if cat_end_input:
1337
+ # Storing custom categorical encoding indicator in data transform dictionary
1338
+ self.data_transform_dict["custom_categorical_encoding_ind"] = True
1339
+ # Fetching user input list for performing
1340
+ encoding_list = self.custom_data.get("CategoricalEncodingParam", None)
1341
+ if encoding_list:
1342
+ onehot_encode_ind = encoding_list.get("OneHotEncodingIndicator", False)
1343
+ ordinal_encode_ind = encoding_list.get("OrdinalEncodingIndicator", False)
1344
+ target_encode_ind = encoding_list.get("TargetEncodingIndicator", False)
1345
+ # Checking if any of categorical encoding technique indicator
1346
+ if not any([onehot_encode_ind, ordinal_encode_ind, target_encode_ind]):
1347
+ self._display_msg(inline_msg="No information provided for any type of customized categorical encoding techniques. AutoML will proceed with default encoding technique.",
1348
+ progress_bar=self.progress_bar)
1349
+ else:
1350
+ if onehot_encode_ind:
1351
+ unique_count = []
1352
+ ohe_list = encoding_list.get("OneHotEncodingList", None)
1353
+ # Checking for empty list
1354
+ if not ohe_list:
1355
+ self._display_msg(inline_msg="No information provided for customized one hot encoding technique.",
1356
+ progress_bar=self.progress_bar)
1357
+ else:
1358
+ # Checking for column present in dataset or not
1359
+ _Validators._validate_dataframe_has_argument_columns(ohe_list, "OneHotEncodingList", self.data, "df")
1360
+
1361
+ # Keeping track for existing columns before apply one hot encoding
1362
+ col_bf_ohe = self.data.columns
1363
+ # Detecting categorical columns with their unique counts
1364
+ for col in ohe_list:
1365
+ unique_count.append(self.data.drop_duplicate(col).size)
1366
+ # Performing one hot encoding
1367
+ self._one_hot_encoding(ohe_list, unique_count)
1368
+ # Keeping track for new columns after apply one hot encoding
1369
+ col_af_ohe = self.data.columns
1370
+ # Fetching list of columns on which outlier processing should not be applied
1371
+ self.excluded_cols.extend(self._extract_list(col_af_ohe, col_bf_ohe))
1372
+
1373
+ self._display_msg(msg="Updated dataset sample after performing one hot encoding:",
1374
+ data=self.data,
1375
+ progress_bar=self.progress_bar)
1376
+
1377
+ if ordinal_encode_ind:
1378
+ ord_list = encoding_list.get("OrdinalEncodingList", None)
1379
+ # Checking for empty list
1380
+ if not ord_list:
1381
+ self._display_msg(inline_msg="No information provided for customized ordinal encoding technique.",
1382
+ progress_bar=self.progress_bar)
1383
+ else:
1384
+ # Checking for column present in dataset or not
1385
+ _Validators._validate_dataframe_has_argument_columns(ord_list, "OrdinalEncodingList", self.data, "df")
1386
+
1387
+ # Performing ordinal encoding
1388
+ self._ordinal_encoding(ord_list)
1389
+ self._display_msg(msg="Updated dataset sample after performing ordinal encoding:",
1390
+ data=self.data,
1391
+ progress_bar=self.progress_bar)
1392
+
1393
+ if target_encode_ind:
1394
+ tar_list = encoding_list.get("TargetEncodingList", None)
1395
+ if not tar_list:
1396
+ self._display_msg(inline_msg="No information provided for customized target encoding technique.",
1397
+ progress_bar=self.progress_bar)
1398
+ else:
1399
+ # Performing target encoding
1400
+ self._target_encoding(tar_list)
1401
+ self._display_msg(msg="Updated dataset sample after performing target encoding:",
1402
+ data=self.data,
1403
+ progress_bar=self.progress_bar)
1404
+ else:
1405
+ self._display_msg(inline_msg="No input provided for performing customized categorical encoding. AutoML will proceed with default encoding technique.",
1406
+ progress_bar=self.progress_bar)
1407
+ else:
1408
+ self._display_msg(inline_msg="AutoML will proceed with default encoding technique.",
1409
+ progress_bar=self.progress_bar)
1410
+
1411
+ # Performing default encoding on remaining categorical columns
1412
+ self._encoding_categorical_columns()
1413
+
1414
+ def _numapply_transformation(self, target_col, transform_val):
1415
+ """
1416
+ DESCRIPTION:
1417
+ Function to perform different numerical transformations using NumApply on numerical features based on user input.
1418
+
1419
+ """
1420
+ # Fetching columns for accumulation
1421
+ accumulate_columns = self._extract_list(self.data.columns, [target_col])
1422
+ apply_method = transform_val["apply_method"]
1423
+ # Adding fit parameters for performing transformation
1424
+ fit_params={
1425
+ "data": self.data,
1426
+ "target_columns" : target_col,
1427
+ "apply_method" : apply_method,
1428
+ "inplace" : True,
1429
+ "persist" :True,
1430
+ "accumulate" : accumulate_columns
1431
+ }
1432
+ # Adding addition details for fit parameters in case of SIGMOID transformation
1433
+ if apply_method == "sigmoid":
1434
+ sigmoid_style=transform_val["sigmoid_style"]
1435
+ fit_params = {**fit_params, "sigmoid_style" : sigmoid_style}
1436
+ # Performing transformation on target columns
1437
+ return NumApply(**fit_params).result
1438
+
1439
+ def _numerical_transformation(self, target_columns, num_transform_data):
1440
+ """
1441
+ DESCRIPTION:
1442
+ Function to perform different numerical transformations using Fit and Transform on numerical features based on user input.
1443
+
1444
+ """
1445
+ # Adding fit parameters for transformation
1446
+ fit_params={
1447
+ "data" : self.data,
1448
+ "object" : num_transform_data,
1449
+ "object_order_column" : "TargetColumn"
1450
+ }
1451
+ # Peforming fit with all arguments.
1452
+ num_fit_obj = Fit(**fit_params)
1453
+ # Fetching all numerical columns
1454
+ numerical_columns = [col for col, d_type in self.data._column_names_and_types if d_type in ["int","float"]]
1455
+ # Extracting id columns where transformation should not affect numerical columns
1456
+ id_columns = self._extract_list(numerical_columns,target_columns)
1457
+ # Storing fit object and id column list for numerical transformation in data transform dictionary
1458
+ self.data_transform_dict['custom_numerical_transformation_fit_object'] = num_fit_obj.result
1459
+ self.data_transform_dict['custom_numerical_transformation_id_columns'] = id_columns
1460
+ # Adding transform parameters for transformation
1461
+ transform_params={
1462
+ "data" : self.data,
1463
+ "object" : num_fit_obj.result,
1464
+ "id_columns" : id_columns,
1465
+ "persist" :True
1466
+ }
1467
+ # Peforming transformation on target columns
1468
+ self.data = Transform(**transform_params).result
1469
+ self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
1470
+ data=self.data,
1471
+ progress_bar=self.progress_bar)
1472
+
1473
+ def _mathematical_transformation(self):
1474
+ """
1475
+ DESCRIPTION:
1476
+ Function to perform different mathematical transformations (i.e., log, pow,
1477
+ exp, sininv, sigmoid) on numerical features based on user input.
1478
+ """
1479
+ self._display_msg(msg="\nStarting customized mathematical transformation ...",
1480
+ progress_bar=self.progress_bar,
1481
+ show_data=True)
1482
+
1483
+ mat_transform_input = self.custom_data.get("MathameticalTransformationIndicator", False)
1484
+ # Checking user input for mathematical transformations
1485
+ if mat_transform_input:
1486
+ # Extracting list required for mathematical transformations
1487
+ mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None)
1488
+ if mat_transform_list:
1489
+ # Checking for column present in dataset or not
1490
+ _Validators._validate_dataframe_has_argument_columns(list(mat_transform_list.keys()),
1491
+ "MathameticalTransformationParam", self.data, "df")
1492
+
1493
+ # List of storing target columns and mathematical transformation information
1494
+ transform_data=[]
1495
+ target_columns=[]
1496
+ # Storing custom mathematical transformation indicator in data transform dictionary
1497
+ self.data_transform_dict['custom_mathematical_transformation_ind'] = True
1498
+ # Storing custom numapply transformation parameters in data transform dictionary
1499
+ self.data_transform_dict['custom_numapply_transformation_param'] = {}
1500
+
1501
+ for col, transform_val in mat_transform_list.items():
1502
+ apply_method=transform_val["apply_method"]
1503
+ if apply_method in (["sininv","sigmoid"]):
1504
+ # Applying numapply transformation
1505
+ self.data = self._numapply_transformation(col,transform_val)
1506
+ self._display_msg(msg="Updated dataset sample after applying numapply transformation:",
1507
+ data=self.data,
1508
+ progress_bar=self.progress_bar)
1509
+ # Updating parameter details for each column
1510
+ self.data_transform_dict['custom_numapply_transformation_param'].update({col:transform_val})
1511
+ else:
1512
+ # Handling specific scenarios for log and pow transformation
1513
+ parameters=""
1514
+ if apply_method == "log":
1515
+ base = transform_val["base"]
1516
+ parameters = json.dumps({"base":base})
1517
+ elif apply_method == "pow":
1518
+ exponent = transform_val["exponent"]
1519
+ parameters = json.dumps({"exponent":exponent})
1520
+ target_columns.append(col)
1521
+ transform_data.append({"TargetColumn":col, "DefaultValue":1, "Transformation":apply_method, "Parameters":parameters})
1522
+ # Checking for transformation data
1523
+ if len(transform_data):
1524
+ # Coverting into pandas and then teradata dataframe for performing further opration
1525
+ transform_data = pd.DataFrame(transform_data, columns=["TargetColumn", "DefaultValue", "Transformation", "Parameters"])
1526
+ self._display_msg(msg="Numerical transformation information :-",
1527
+ data=transform_data,
1528
+ progress_bar=self.progress_bar)
1529
+ copy_to_sql(df=transform_data, table_name="automl_num_transform_data", temporary=True)
1530
+ num_transform_data = DataFrame.from_table("automl_num_transform_data")
1531
+ # Applying transformation using Fit/Transform functions
1532
+ self._numerical_transformation(target_columns, num_transform_data)
1533
+ # Storing custom numerical transformation parameters and column list in data transform dictionary
1534
+ self.data_transform_dict['custom_numerical_transformation_col'] = target_columns
1535
+ self.data_transform_dict['custom_numerical_transformation_params'] = num_transform_data
1536
+ else:
1537
+ self._display_msg(inline_msg="No input provided for performing customized mathematical transformation.",
1538
+ progress_bar=self.progress_bar)
1539
+ else:
1540
+ self._display_msg(inline_msg="Skipping customized mathematical transformation.",
1541
+ progress_bar=self.progress_bar)
1542
+
1543
+ def _non_linear_transformation(self):
1544
+ """
1545
+ DESCRIPTION:
1546
+ Function to perform customized non-linear transformation on numerical features based on user input.
1547
+
1548
+ """
1549
+ self._display_msg(msg="\nStarting customized non-linear transformation ...",
1550
+ progress_bar=self.progress_bar,
1551
+ show_data=True)
1552
+ nl_transform_input = self.custom_data.get("NonLinearTransformationIndicator", False)
1553
+ # Checking user input for non-linear transformation
1554
+ if nl_transform_input:
1555
+ nl_transform_list = self.custom_data.get("NonLinearTransformationParam", None)
1556
+ # Extracting list required for non-linear transformation
1557
+ if nl_transform_list:
1558
+ total_combination = len(nl_transform_list)
1559
+ # Generating all possible combination names
1560
+ possible_combination = ["Combination_"+str(counter) for counter in range(1,total_combination+1)]
1561
+ self._display_msg(msg="Possible combination :",
1562
+ col_lst=possible_combination,
1563
+ progress_bar=self.progress_bar)
1564
+ # Storing custom non-linear transformation indicator in data transform dictionary
1565
+ self.data_transform_dict['custom_non_linear_transformation_ind'] = True
1566
+ # Storing custom non-linear transformation fit object in data transform dictionary
1567
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'] = {}
1568
+ # print("Possible combination :",possible_combination)
1569
+ # Performing transformation for each combination
1570
+ for comb, transform_val in nl_transform_list.items():
1571
+ if comb in possible_combination:
1572
+ target_columns = transform_val["target_columns"]
1573
+ # Checking for column present in dataset or not
1574
+ _Validators._validate_dataframe_has_argument_columns(target_columns,
1575
+ "target_columns", self.data, "df")
1576
+
1577
+ formula = transform_val["formula"]
1578
+ result_column = transform_val["result_column"]
1579
+ # Adding fit params for transformation
1580
+ fit_param = {
1581
+ "data" : self.data,
1582
+ "target_columns" : target_columns,
1583
+ "formula" : formula,
1584
+ "result_column" : result_column
1585
+ }
1586
+ # Performing fit on dataset
1587
+ fit_obj = NonLinearCombineFit(**fit_param)
1588
+ # Updating it for each non-linear combination
1589
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj})
1590
+ # Adding transform params for transformation
1591
+ transform_params = {
1592
+ "data" : self.data,
1593
+ "object" : fit_obj,
1594
+ "accumulate" : self.data.columns,
1595
+ "persist" : True
1596
+ }
1597
+ self.data = NonLinearCombineTransform(**transform_params).result
1598
+ else:
1599
+ self._display_msg(inline_msg="Combinations are not as per expectation.",
1600
+ progress_bar=self.progress_bar)
1601
+ self._display_msg(msg="Updated dataset sample after performing non-liner transformation:",
1602
+ data=self.data,
1603
+ progress_bar=self.progress_bar)
1604
+ else:
1605
+ self._display_msg(inline_msg="No information provided for performing customized non-linear transformation.",
1606
+ progress_bar=self.progress_bar)
1607
+ else:
1608
+ self._display_msg(inline_msg="Skipping customized non-linear transformation.",
1609
+ progress_bar=self.progress_bar)
1610
+
1611
+ def _anti_select_columns(self):
1612
+ """
1613
+ DESCRIPTION:
1614
+ Function to remove specific features from dataset based on user input.
1615
+
1616
+ """
1617
+ self._display_msg(msg="\nStarting customized anti-select columns ...",
1618
+ progress_bar=self.progress_bar,
1619
+ show_data=True)
1620
+ anti_select_input = self.custom_data.get("AntiselectIndicator", False)
1621
+ # Checking user input for anti-select columns
1622
+ if anti_select_input:
1623
+ # Extracting list required for anti-select columns
1624
+ anti_select_list = self.custom_data.get("AntiselectParam", None)
1625
+ if(anti_select_list):
1626
+ if all(item in self.data.columns for item in anti_select_list):
1627
+ # Storing custom anti-select columns indicator and column list in data transform dictionary
1628
+ self.data_transform_dict['custom_anti_select_columns_ind'] = True
1629
+ self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
1630
+ fit_params = {
1631
+ "data" : self.data,
1632
+ "exclude" : anti_select_list
1633
+ }
1634
+ # Performing transformation for given user input
1635
+ self.data = Antiselect(**fit_params).result
1636
+ self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
1637
+ data=self.data,
1638
+ progress_bar=self.progress_bar)
1639
+ else:
1640
+ self._display_msg(msg="Columns provided in list are not present in dataset:",
1641
+ col_lst=anti_select_list,
1642
+ progress_bar=self.progress_bar)
1643
+ else:
1644
+ self._display_msg(inline_msg="No information provided for performing anti-select columns operation.",
1645
+ progress_bar=self.progress_bar)
1646
+ else:
1647
+ self._display_msg(inline_msg="Skipping customized anti-select columns.",
1648
+ progress_bar=self.progress_bar)