teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1580 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2024 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import pandas as pd
18
+ import time
19
+ import json
20
+ import re
21
+
22
+ # Teradata libraries
23
+ from teradataml.dataframe.dataframe import DataFrame
24
+ from teradataml.dataframe.copy_to import copy_to_sql
25
+ from teradataml import Antiselect
26
+ from teradataml import BincodeFit, BincodeTransform
27
+ from teradataml import CategoricalSummary, ColumnSummary, ConvertTo, GetFutileColumns, FillRowId
28
+ from teradataml import Fit, Transform
29
+ from teradataml import NonLinearCombineFit, NonLinearCombineTransform
30
+ from teradataml import NumApply
31
+ from teradataml import OneHotEncodingFit, OneHotEncodingTransform
32
+ from teradataml import OrdinalEncodingFit, OrdinalEncodingTransform
33
+ from teradataml import SimpleImputeFit, SimpleImputeTransform
34
+ from teradataml import StrApply
35
+ from teradataml import TargetEncodingFit, TargetEncodingTransform
36
+ from sqlalchemy import literal_column
37
+ from teradatasqlalchemy import INTEGER
38
+ from teradataml import display
39
+ from teradataml.common.garbagecollector import GarbageCollector
40
+ from teradataml.dataframe.sql_functions import case
41
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
42
+ from teradataml.utils.validators import _Validators
43
+
44
+
45
+ class _FeatureEngineering:
46
+
47
+ def __init__(self,
48
+ data,
49
+ target_column,
50
+ model_list,
51
+ verbose = 0,
52
+ task_type = "Regression",
53
+ custom_data = None):
54
+ """
55
+ DESCRIPTION:
56
+ Function initializes the data, target column and columns datatypes
57
+ for feature engineering.
58
+
59
+ PARAMETERS:
60
+ data:
61
+ Required Argument.
62
+ Specifies the input teradataml DataFrame for feature engineering.
63
+ Types: teradataml Dataframe
64
+
65
+ target_column:
66
+ Required Argument.
67
+ Specifies the name of the target column in "data"..
68
+ Types: str
69
+
70
+ model_list:
71
+ Required Argument.
72
+ Specifies the list of models to be used for model training.
73
+ Types: list
74
+
75
+ verbose:
76
+ Optional Argument.
77
+ Specifies the detailed execution steps based on verbose level.
78
+ Default Value: 0
79
+ Permitted Values:
80
+ * 0: prints the progress bar and leaderboard
81
+ * 1: prints the execution steps of AutoML.
82
+ * 2: prints the intermediate data between the execution of each step of AutoML.
83
+ Types: int
84
+
85
+ task_type:
86
+ Required Argument.
87
+ Specifies the task type for AutoML, whether to apply regresion OR classification
88
+ on the provived dataset.
89
+ Default Value: "Regression"
90
+ Permitted Values: "Regression", "Classification"
91
+ Types: str
92
+
93
+ custom_data:
94
+ Optional Argument.
95
+ Specifies json object containing user customized input.
96
+ Types: json object
97
+ """
98
+ # Instance variables
99
+ self.data = data
100
+ self.target_column = target_column
101
+ self.model_list = model_list
102
+ self.verbose = verbose
103
+ self.task_type = task_type
104
+ self.custom_data = custom_data
105
+ self.excluded_cols=[]
106
+ self.data_types = {key: value for key, value in self.data._column_names_and_types}
107
+ self.target_label = None
108
+ self.data_transform_dict = {}
109
+ self.one_hot_obj_count = 0
110
+ self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
111
+
112
+ # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
113
+ def feature_engineering(self,
114
+ auto=True):
115
+ """
116
+ DESCRIPTION:
117
+ Function performs following operations :-
118
+ 1. Removes futile columns/features from dataset.
119
+ 2. Detects the columns with missing values.
120
+ 3. Performs imputation on these columns with missing values.
121
+ 4. Detects categorical columns and perform encoding on those columns.
122
+
123
+ PARAMETERS:
124
+ auto:
125
+ Optional Argument.
126
+ Specifies whether to run AutoML in custom mode or auto mode.
127
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
128
+ Default Value: True
129
+ Types: boolean
130
+
131
+ Returns:
132
+ tuple, First element represents teradataml DataFrame,
133
+ second element represents list of columns which are not participating in outlier tranformation.
134
+ """
135
+ # Assigning number of base jobs for progress bar.
136
+ base_jobs = 14 if auto else 18
137
+
138
+ # Updating model list based on distinct value of target column for classification type
139
+ if self.is_classification_type():
140
+ if self.data.drop_duplicate(self.target_column).size > 2:
141
+ unsupported_models = ['svm', 'glm']
142
+ self.model_list = [model for model in self.model_list if model not in unsupported_models]
143
+
144
+ # Updating number of jobs for progress bar based on number of models.
145
+ jobs = base_jobs + len(self.model_list)
146
+ self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Automl Running:')
147
+
148
+ self._display_heading(phase=1,
149
+ progress_bar=self.progress_bar)
150
+ self._display_msg(msg='Feature Engineering started ...',
151
+ progress_bar=self.progress_bar)
152
+
153
+ # Storing target column to data transform dictionary
154
+ self.data_transform_dict['data_target_column'] = self.target_column
155
+ # Storing target column encoding indicator to data transform dictionary
156
+ self.data_transform_dict['target_col_encode_ind'] = False
157
+ # Storing task type to data transform dictionary
158
+ self.data_transform_dict['classification_type']=self.is_classification_type()
159
+ # Storing params for performing one hot encoding
160
+ self.data_transform_dict['one_hot_encoding_fit_obj'] ={}
161
+ self.data_transform_dict['one_hot_encoding_drop_list'] = []
162
+
163
+ if auto:
164
+ self._remove_duplicate_rows()
165
+ self.progress_bar.update()
166
+
167
+ self._remove_futile_columns()
168
+ self.progress_bar.update()
169
+
170
+ self._handle_date_columns()
171
+ self.progress_bar.update()
172
+
173
+ self._handling_missing_value()
174
+ self.progress_bar.update()
175
+
176
+ self._impute_missing_value()
177
+ self.progress_bar.update()
178
+
179
+ self._encoding_categorical_columns()
180
+ self.progress_bar.update()
181
+
182
+ else:
183
+ self._remove_duplicate_rows()
184
+ self.progress_bar.update()
185
+
186
+ self._remove_futile_columns()
187
+ self.progress_bar.update()
188
+
189
+ self._handle_date_columns()
190
+ self.progress_bar.update()
191
+
192
+ self._custom_handling_missing_value()
193
+ self.progress_bar.update()
194
+
195
+ self._bin_code_transformation()
196
+ self.progress_bar.update()
197
+
198
+ self._string_manipulation()
199
+ self.progress_bar.update()
200
+
201
+ self._custom_categorical_encoding()
202
+ self.progress_bar.update()
203
+
204
+ self._mathematical_transformation()
205
+ self.progress_bar.update()
206
+
207
+ self._non_linear_transformation()
208
+ self.progress_bar.update()
209
+
210
+ self._anti_select_columns()
211
+ self.progress_bar.update()
212
+
213
+ return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
214
+
215
+ def _extract_list(self,
216
+ list1,
217
+ list2):
218
+ """
219
+ DESCRIPTION:
220
+ Function to extract elements from list1 which are not present in list2.
221
+
222
+ PARAMETERS:
223
+ list1:
224
+ Required Argument.
225
+ Specifies the first list for extracting elements from.
226
+ Types: list
227
+
228
+ list2:
229
+ Required Argument.
230
+ Specifies the second list to get elements for avoiding in first list while extracting.
231
+ Types: list
232
+
233
+ RETURN:
234
+ Returns extracted elements in form of list.
235
+
236
+ """
237
+ new_lst = list(set(list1) - set(list2))
238
+ return new_lst
239
+
240
+ def _remove_duplicate_rows(self):
241
+ """
242
+ DESCRIPTION:
243
+ Function to handles duplicate rows present in dataset.
244
+
245
+ """
246
+ self._display_msg(msg="\nHandling duplicate records present in dataset ...",
247
+ progress_bar=self.progress_bar,
248
+ show_data=True)
249
+ start_time = time.time()
250
+ rows = self.data.shape[0]
251
+ self.data=self.data.drop_duplicate()
252
+ if rows != self.data.shape[0]:
253
+ self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
254
+ data=self.data,
255
+ progress_bar=self.progress_bar)
256
+ self._display_msg(inline_msg=f"Remaining Rows in the data: {self.data.shape[0]}\n"\
257
+ f"Remaining Columns in the data: {self.data.shape[1]}",
258
+ progress_bar=self.progress_bar)
259
+ else:
260
+ self._display_msg(inline_msg="Analysis completed. No action taken.",
261
+ progress_bar=self.progress_bar)
262
+
263
+ end_time = time.time()
264
+ self._display_msg(msg="Total time to handle duplicate records: {:.2f} sec ".format(end_time - start_time),
265
+ progress_bar=self.progress_bar,
266
+ show_data=True)
267
+
268
+ def _get_distinct_count(self):
269
+ """
270
+ DESCRIPTION:
271
+ Function to get distinct count for all features and store it in dictionary for further use.
272
+ """
273
+ # Count of distinct value in each column
274
+ counts = self.data.select(self.data.columns).count(distinct=True)
275
+
276
+ # Dict containing disctinct value in each column
277
+ self.counts_dict = next(counts.itertuples())._asdict()
278
+
279
+ def _preprocess_data(self):
280
+ """
281
+ DESCRIPTION:
282
+ Function replaces the existing id column or adds the new id column and
283
+ removes columns with sinlge value/same values in the dataset.
284
+ """
285
+ # Get distinct value in each column
286
+ self._get_distinct_count()
287
+
288
+ # Columns to removed if
289
+ # id column detected or count of distinct value = 1
290
+ columns_to_be_removed = [col for col in self.data.columns if col.lower() == 'id' or self.counts_dict[f'count_{col}'] == 1]
291
+
292
+ # Removing id column, if exists
293
+ if len(columns_to_be_removed) != 0:
294
+ self.data = self.data.drop(columns_to_be_removed, axis=1)
295
+ # Storing irrelevent column list in data transform dictionary
296
+ self.data_transform_dict['drop_irrelevent_columns'] = columns_to_be_removed
297
+
298
+ # Adding id columns
299
+ obj = FillRowId(data=self.data, row_id_column='id')
300
+
301
+ self.data = obj.result
302
+
303
+ def _remove_futile_columns(self):
304
+ """
305
+ DESCRIPTION:
306
+ Function removes the futile columns from dataset.
307
+ """
308
+ self._display_msg(msg="\nHandling less significant features from data ...",
309
+ progress_bar=self.progress_bar,
310
+ show_data=True)
311
+ start_time = time.time()
312
+
313
+ self._preprocess_data()
314
+
315
+ # Handling string type target column in classification
316
+ # Performing Ordinal Encoding
317
+ if self.data_types[self.target_column] in ['str']:
318
+ self._ordinal_encoding([self.target_column])
319
+
320
+ # Detecting categorical columns
321
+ categorical_columns = [col for col, d_type in self.data._column_names_and_types if d_type == 'str']
322
+
323
+ # Detecting and removing futile columns, if categorical_column exists
324
+ if len(categorical_columns) != 0:
325
+
326
+ obj = CategoricalSummary(data=self.data,
327
+ target_columns=categorical_columns)
328
+
329
+ gfc_out = GetFutileColumns(data=self.data,
330
+ object=obj,
331
+ category_summary_column="ColumnName",
332
+ threshold_value =0.7)
333
+
334
+ # Extracting Futile columns
335
+ f_cols = [row[0] for row in gfc_out.result.itertuples()]
336
+
337
+ if len(f_cols) == 0:
338
+ self._display_msg(inline_msg="Analysis indicates all categorical columns are significant. No action Needed.",
339
+ progress_bar=self.progress_bar)
340
+ else:
341
+
342
+ self.data = self.data.drop(f_cols, axis=1)
343
+ # Storing futile column list in data transform dictionary
344
+ self.data_transform_dict['futile_columns'] = f_cols
345
+ self._display_msg(msg='Removing Futile columns:',
346
+ col_lst=f_cols,
347
+ progress_bar=self.progress_bar)
348
+ self._display_msg(msg='Sample of Data after removing Futile columns:',
349
+ data=self.data,
350
+ progress_bar=self.progress_bar)
351
+ end_time= time.time()
352
+ self._display_msg(msg="Total time to handle less significant features: {:.2f} sec ".format( end_time - start_time),
353
+ progress_bar=self.progress_bar,
354
+ show_data=True)
355
+
356
+ def _fetch_date_component(self):
357
+ """
358
+ DESCRIPTION:
359
+ Function to fetch day of week, week of month, month of quarter, quarter of year
360
+ component from date column. Generate weekend and month half details from day of week and
361
+ week of month columns respectively. Convert quarter of year and month of quarter
362
+ component columns to VARCHAR.
363
+
364
+ RETURNS:
365
+ List of newly generated date component features.
366
+ """
367
+ # List for storing newly generated date component features
368
+ new_date_components=[]
369
+ # Extracting weekend, month, quarter details information from date columns
370
+ date_component_param={}
371
+ for col in self.date_column_list:
372
+ # Generating new column names for extracted date components
373
+ weekend_col = f'{col}_weekend'
374
+ month_half_col = f'{col}_month_half'
375
+ month_of_quarter_col=f'{col}_month_of_quarter'
376
+ quarter_of_year_col=f'{col}_quarter_of_year'
377
+
378
+ date_component_param = {
379
+ **date_component_param,
380
+ weekend_col: case([(self.data[col].day_of_week().isin([1, 7]), 'yes')], else_='no'),
381
+ month_half_col: case([(self.data[col].week_of_month().isin([1, 2]), 'first_half')], else_='second_half'),
382
+ month_of_quarter_col: self.data[col].month_of_quarter(),
383
+ quarter_of_year_col: self.data[col].quarter_of_year()
384
+ }
385
+ # Storing newly generated date component month and quarter columns.
386
+ # Skipping day of week and week of month columns as they will be used
387
+ # later for extracting weekend and month part details.
388
+ new_date_components.extend([weekend_col, month_half_col, month_of_quarter_col, quarter_of_year_col])
389
+ # Adding new date component columns to dataset
390
+ self.data=self.data.assign(**date_component_param)
391
+ # Dropping date columns as different component columns are extracted.
392
+ self.data = self.data.drop(self.date_column_list, axis=1)
393
+
394
+ # Converting remaining component columns to VARCHAR
395
+ # So that it will be treated as categorical columns
396
+ remaining_component_columns = [col for col in self.data.columns if re.search('month_of_quarter|quarter_of_year'+"$", col)]
397
+ accumulate_columns = self._extract_list(self.data.columns, remaining_component_columns)
398
+ convertto_params = {
399
+ "data" : self.data,
400
+ "target_columns" : remaining_component_columns,
401
+ "target_datatype" : ["VARCHAR(charlen=20,charset=UNICODE,casespecific=NO)"],
402
+ "accumulate" : accumulate_columns,
403
+ "persist" : True
404
+ }
405
+ # returning dataset after performing string manipulation
406
+ self.data = ConvertTo(**convertto_params).result
407
+ # Adding transformed data containing table to garbage collector
408
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
409
+ return new_date_components
410
+
411
+ def _handle_date_columns_helper(self):
412
+
413
+ """
414
+ DESCRIPTION:
415
+ Function for dropping irrelevent date features. Perform Extraction of different
416
+ component from revelent date features and transform them.
417
+ """
418
+
419
+ # Dropping missing value for all date columns
420
+ self._display_msg(msg="\nDropping missing values for:",
421
+ col_lst=self.date_column_list,
422
+ progress_bar=self.progress_bar)
423
+
424
+ self.data = self.data.dropna(subset=self.date_column_list)
425
+
426
+ # Date columns list eligible for dropping from dataset
427
+ drop_date_cols = []
428
+
429
+ # Checking for unique valued date columns
430
+ for col in self.date_column_list:
431
+ if self.data.drop_duplicate(col).size == self.data.shape[0]:
432
+ drop_date_cols.append(col)
433
+
434
+ if len(drop_date_cols) != 0:
435
+ self.data = self.data.drop(drop_date_cols, axis=1)
436
+ # Storing unique date column list in data transform dictionary
437
+ self.data_transform_dict['drop_unique_date_columns'] = drop_date_cols
438
+ self._display_msg(msg='Dropping date features with all unique value:',
439
+ col_lst = drop_date_cols,
440
+ progress_bar=self.progress_bar)
441
+ # Updated date column list after dropping irrelevant date columns
442
+ self.date_column_list = [item for item in self.date_column_list if item not in drop_date_cols]
443
+
444
+ if len(self.date_column_list) != 0:
445
+
446
+ # List for storing newly generated date component features
447
+ new_columns=self._fetch_date_component()
448
+ self._display_msg(msg='List of newly generated features from existing date features:',
449
+ col_lst=new_columns,
450
+ progress_bar=self.progress_bar)
451
+ # Dropping columns with all unique values or single value
452
+ drop_cols=[]
453
+ for col in new_columns:
454
+ distinct_rows = self.data.drop_duplicate(col).size
455
+ if distinct_rows == self.data.shape[0]:
456
+ drop_cols.append(col)
457
+ self._display_msg(msg='Dropping features with all unique values:',
458
+ col_lst=col,
459
+ progress_bar=self.progress_bar)
460
+
461
+ elif distinct_rows == 1:
462
+ drop_cols.append(col)
463
+ self._display_msg(msg='Dropping features with single value:',
464
+ col_lst=col,
465
+ progress_bar=self.progress_bar)
466
+
467
+ # Dropping columns from drop_cols list
468
+ if len(drop_cols) != 0:
469
+ self.data = self.data.drop(drop_cols, axis=1)
470
+ # Storing extract date component list for drop in data transform dictionary
471
+ self.data_transform_dict['drop_extract_date_columns'] = drop_cols
472
+ # Extracting all newly generated columns
473
+ new_columns = [item for item in new_columns if item not in drop_cols]
474
+
475
+ self._display_msg(msg='Updated list of newly generated features from existing date features :',
476
+ col_lst=new_columns,
477
+ progress_bar=self.progress_bar)
478
+
479
+ self._display_msg(msg='Updated dataset sample after handling date features:',
480
+ data=self.data,
481
+ progress_bar=self.progress_bar)
482
+ else:
483
+ self._display_msg(inline_msg="No useful date feature found",
484
+ progress_bar=self.progress_bar)
485
+
486
+ def _handle_date_columns(self):
487
+
488
+ """
489
+ DESCRIPTION:
490
+ Function to handle date columns in dataset if any.
491
+ Perform relevent transformation by extracting different components, i.e., Day , Month and Year.
492
+ """
493
+ self._display_msg(msg="\nHandling Date Features ...",
494
+ progress_bar=self.progress_bar,
495
+ show_data=True)
496
+ start_time = time.time()
497
+
498
+ self.date_column_list = [col for col, d_type in self.data._column_names_and_types \
499
+ if d_type in ["datetime.date","datetime.datetime"]]
500
+
501
+ if len(self.date_column_list) == 0:
502
+ self._display_msg(inline_msg="Analysis Completed. Dataset does not contain any feature related to dates. No action needed.",
503
+ progress_bar=self.progress_bar)
504
+ else:
505
+ # Storing date column list in data transform dictionary
506
+ self.data_transform_dict['date_columns'] = self.date_column_list
507
+ self._handle_date_columns_helper()
508
+
509
+ end_time = time.time()
510
+ self._display_msg(msg="Total time to handle date features: {:.2f} sec\n".format(end_time-start_time),
511
+ progress_bar=self.progress_bar,
512
+ show_data=True)
513
+
514
+ def _missing_count_per_column(self):
515
+ """
516
+ DESCRIPTION:
517
+ Function finds and returns a dictnoary containing list of columns
518
+ with missing values.
519
+
520
+ Returns:
521
+ dict, keys represent column names and
522
+ values represent the missing value count for corresponding column.
523
+ """
524
+
525
+ # Removing rows with missing target column value
526
+ self.data = self.data.dropna(subset=[self.target_column])
527
+
528
+ obj = ColumnSummary(data=self.data,
529
+ target_columns=self.data.columns)
530
+
531
+ cols_miss_val={}
532
+ # Iterating over each row in the column summary result
533
+ for row in obj.result.itertuples():
534
+ # Checking if the third element of the row (missing values count) is greater than 0
535
+ if row[3] > 0:
536
+ # If so, add an entry to the 'cols_miss_val' dictionary
537
+ # Key: column name (first element of the row)
538
+ # Value: count of missing values in the column (third element of the row)
539
+ cols_miss_val[row[0]] = row[3]
540
+
541
+ return cols_miss_val
542
+
543
+ def _handling_missing_value(self):
544
+ """
545
+ DESCRIPTION:
546
+ Function detects the missing values in the each feature of dataset,
547
+ then performs these operation based on condition :-
548
+ 1. deleting rows from columns/feature
549
+ 2. dropping columns from dataset
550
+ """
551
+ self._display_msg(msg="\nChecking Missing values in dataset ...",
552
+ progress_bar=self.progress_bar,
553
+ show_data=True)
554
+ start_time = time.time()
555
+
556
+ # Flag for missing values
557
+ msg_val_found=0
558
+
559
+ #num of rows
560
+ d_size = self.data.shape[0]
561
+
562
+ delete_rows = []
563
+ drop_cols = []
564
+ self.imputation_cols = {}
565
+
566
+ cols_miss_val = self._missing_count_per_column()
567
+
568
+ if len(cols_miss_val) != 0:
569
+ self._display_msg(msg="Columns with their missing values:",
570
+ col_lst=cols_miss_val,
571
+ progress_bar=self.progress_bar)
572
+
573
+ # Get distinct value in each column
574
+ self._get_distinct_count()
575
+
576
+ # Iterating over columns with missing values
577
+ for col,val in cols_miss_val.items():
578
+
579
+ # Drop col, if count of missing value > 60%
580
+ if val > .6*d_size:
581
+ drop_cols.append(col)
582
+ continue
583
+
584
+ if self.data_types[col] in ['float', 'int']:
585
+ corr_df = self.data[col].corr(self.data[self.target_column])
586
+ corr_val = self.data.assign(True, corr_=corr_df)
587
+ related = next(corr_val.itertuples())[0]
588
+
589
+ # Delete row, if count of missing value < 2% and
590
+ # Relation b/w target column and numeric column <= .25
591
+ if val < .02*d_size and related <= .25:
592
+ delete_rows.append(col)
593
+ continue
594
+
595
+ elif self.data_types[col] in ['str']:
596
+ # Delete row, if count of missing value < 4%
597
+ if val < .04*d_size:
598
+ delete_rows.append(col)
599
+ continue
600
+ # Drop col, if unique count of column > 75%
601
+ elif self.counts_dict[f'count_{col}'] > .75*(d_size-val):
602
+ drop_cols.append(col)
603
+ continue
604
+
605
+ # Remaining column for imputation
606
+ self.imputation_cols[col] = val
607
+ # Storing columns with missing value for imputation in data transform dictionary
608
+ self.data_transform_dict['imputation_columns'] = self.imputation_cols
609
+
610
+ if len(delete_rows) != 0:
611
+ rows = self.data.shape[0]
612
+ self.data = self.data.dropna(subset=delete_rows)
613
+ msg_val_found=1
614
+ self._display_msg(msg='Deleting rows of these columns for handling missing values:',
615
+ col_lst=delete_rows,
616
+ progress_bar=self.progress_bar)
617
+ self._display_msg(msg=f'Sample of dataset after removing {rows-self.data.shape[0]} rows:',
618
+ data=self.data,
619
+ progress_bar=self.progress_bar)
620
+
621
+ if len(drop_cols) != 0:
622
+ self.data = self.data.drop(drop_cols, axis=1)
623
+ msg_val_found=1
624
+ # Storing columns with missing value for drop in data transform dictionary
625
+ self.data_transform_dict['drop_missing_columns'] = drop_cols
626
+ self._display_msg(msg='Dropping these columns for handling missing values:',
627
+ col_lst=drop_cols,
628
+ progress_bar=self.progress_bar)
629
+ self._display_msg(msg=f'Sample of dataset after removing {len(drop_cols)} columns:',
630
+ data=self.data,
631
+ progress_bar=self.progress_bar)
632
+
633
+ if len(self.imputation_cols) == 0 and msg_val_found ==0:
634
+ self._display_msg(inline_msg="Analysis Completed. No Missing Values Detected.",
635
+ progress_bar=self.progress_bar)
636
+
637
+ end_time = time.time()
638
+ self._display_msg(msg="Total time to find missing values in data: {:.2f} sec ".format( end_time - start_time),
639
+ progress_bar=self.progress_bar,
640
+ show_data=True)
641
+
642
+ def _impute_helper(self):
643
+ """
644
+ DESCRIPTION:
645
+ Function decides the imputation methods [mean/ median/ mode] for columns with missing values
646
+ on the basis of skewness of column in the dataset.
647
+
648
+ RETURNS:
649
+ A tuple containing,
650
+ col_stat (name of columns with missing value)
651
+ stat (imputation method for respective columns)
652
+ """
653
+ col_stat = []
654
+ stat = []
655
+
656
+ # Converting o/p of skew() into dictonary with key as column name and value as skewness value
657
+ df = self.data.skew()
658
+ skew_data = next(df.itertuples())._asdict()
659
+
660
+ # Iterating over columns with missing value
661
+ for key, val in self.imputation_cols.items():
662
+
663
+ col_stat.append(key)
664
+ if self.data_types[key] in ['float', 'int']:
665
+ val = skew_data[f'skew_{key}']
666
+ # Median imputation method, if abs(skewness value) > 1
667
+ if abs(val) > 1:
668
+ stat.append('median')
669
+ # Mean imputation method, if abs(skewness value) <= 1
670
+ else:
671
+ stat.append('mean')
672
+ # Mode imputation method, if categorical column
673
+ else:
674
+ stat.append('mode')
675
+
676
+ self._display_msg(msg="Columns with their imputation method:",
677
+ col_lst=dict(zip(col_stat, stat)),
678
+ progress_bar=self.progress_bar)
679
+
680
+ return col_stat, stat
681
+
682
+ def _impute_missing_value(self):
683
+ """
684
+ DESCRIPTION:
685
+ Function performs the imputation on columns/features with missing values in the dataset.
686
+ """
687
+
688
+ start_time = time.time()
689
+ self._display_msg(msg="\nImputing Missing Values ...",
690
+ progress_bar=self.progress_bar,
691
+ show_data=True)
692
+
693
+ if len(self.imputation_cols) != 0:
694
+
695
+ # List of columns and imputation Method
696
+ col_stat, stat = self._impute_helper()
697
+
698
+ fit_obj = SimpleImputeFit(data=self.data,
699
+ stats_columns=col_stat,
700
+ stats=stat)
701
+
702
+ # Storing fit object for imputation in data transform dictionary
703
+ self.data_transform_dict['imputation_fit_object'] = fit_obj.output
704
+ sm = SimpleImputeTransform(data=self.data,
705
+ object=fit_obj)
706
+
707
+ self.data = sm.result
708
+ self._display_msg(msg="Sample of dataset after Imputation:",
709
+ data=self.data,
710
+ progress_bar=self.progress_bar)
711
+ else:
712
+ self._display_msg(inline_msg="Analysis completed. No imputation required.",
713
+ progress_bar=self.progress_bar)
714
+
715
+ end_time = time.time()
716
+ self._display_msg(msg="Time taken to perform imputation: {:.2f} sec ".format(end_time - start_time),
717
+ progress_bar=self.progress_bar,
718
+ show_data=True)
719
+
720
+
721
+ def _custom_handling_missing_value(self):
722
+ """
723
+ DESCRIPTION:
724
+ Function to perform customized missing value handling for features based on user input.
725
+
726
+ """
727
+ # Fetching user input for performing missing value handling
728
+ missing_handling_input = self.custom_data.get("MissingValueHandlingIndicator", False)
729
+
730
+ if missing_handling_input:
731
+ # Fetching parameters required for performing
732
+ missing_handling_param = self.custom_data.get("MissingValueHandlingParam", None)
733
+ if missing_handling_param:
734
+ # Fetching user input for different methods missing value handling
735
+ drop_col_ind = missing_handling_param.get("DroppingColumnIndicator", False)
736
+ drop_row_ind = missing_handling_param.get("DroppingRowIndicator", False)
737
+ impute_ind = missing_handling_param.get("ImputeMissingIndicator", False)
738
+ # Checking for user input if all methods indicator are false or not
739
+ if not any([drop_col_ind, drop_row_ind, impute_ind]):
740
+ self._display_msg(inline_msg="No method information provided for performing customized missing value handling. \
741
+ AutoML will proceed with default missing value handling method.",
742
+ progress_bar=self.progress_bar)
743
+
744
+ else:
745
+ # Checking user input for dropping missing value columns
746
+ if drop_col_ind:
747
+ drop_col_list = missing_handling_param.get("DroppingColumnList", [])
748
+ # Storing customcolumns with missing value for drop in data transform dictionary
749
+ self.data_transform_dict["custom_drop_missing_columns"] = drop_col_list
750
+ if len(drop_col_list):
751
+ # Checking for column present in dataset or not
752
+ _Validators._validate_dataframe_has_argument_columns(drop_col_list, "DroppingColumnList", self.data, "df")
753
+
754
+ self._display_msg(msg="\nDropping these columns for handling customized missing value:",
755
+ col_lst=drop_col_list,
756
+ progress_bar=self.progress_bar)
757
+ self.data = self.data.drop(drop_col_list, axis=1)
758
+ else:
759
+ self._display_msg(inline_msg="No information provided for dropping missing value containing columns.",
760
+ progress_bar=self.progress_bar)
761
+
762
+ # Checking user input for dropping missing value rows
763
+ if drop_row_ind:
764
+ drop_row_list = missing_handling_param.get("DroppingRowList", [])
765
+ if len(drop_row_list):
766
+ # Checking for column present in dataset or not
767
+ _Validators._validate_dataframe_has_argument_columns(drop_row_list, "DroppingRowList", self.data, "df")
768
+
769
+ self._display_msg(msg="Dropping missing rows in these columns for handling customized missing value:",
770
+ col_lst=drop_row_list,
771
+ progress_bar=self.progress_bar)
772
+ self.data = self.data.dropna(subset = drop_row_list)
773
+ else:
774
+ self._display_msg(inline_msg="No information provided for dropping missing value containing rows.",
775
+ progress_bar=self.progress_bar)
776
+ # Checking user input for missing value imputation
777
+ if impute_ind:
778
+ stat_list = missing_handling_param.get("StatImputeList", None)
779
+ stat_method = missing_handling_param.get("StatImputeMethod", None)
780
+ literal_list = missing_handling_param.get("LiteralImputeList", None)
781
+ literal_value = missing_handling_param.get("LiteralImputeValue", None)
782
+
783
+ # Checking for column present in dataset or not
784
+ _Validators._validate_dataframe_has_argument_columns(stat_list, "StatImputeList", self.data, "df")
785
+
786
+ _Validators._validate_dataframe_has_argument_columns(literal_list, "LiteralImputeList", self.data, "df")
787
+
788
+ # Creating fit params
789
+ fit_param = {
790
+ "data" : self.data,
791
+ "stats_columns" : stat_list,
792
+ "stats" : stat_method,
793
+ "literals_columns" : literal_list,
794
+ "literals" : literal_value
795
+ }
796
+ # Fitting on dataset
797
+ fit_obj = SimpleImputeFit(**fit_param)
798
+ # Storing custom fit object for imputation in data transform dictionary
799
+ self.data_transform_dict["custom_imputation_ind"] = True
800
+ self.data_transform_dict["custom_imputation_fit_object"] = fit_obj.output
801
+ # Creating transform params
802
+ transform_param = {
803
+ "data" : self.data,
804
+ "object" : fit_obj.output,
805
+ "persist" : True
806
+ }
807
+ # Updating dataset with transform result
808
+ self.data = SimpleImputeTransform(**transform_param).result
809
+ # Adding transformed data containing table to garbage collector
810
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
811
+ self._display_msg(msg="Updated dataset sample after performing customized missing value imputation:",
812
+ data=self.data,
813
+ progress_bar=self.progress_bar)
814
+ else:
815
+ self._display_msg(inline_msg="No information provided for performing customized missing value handling. \
816
+ AutoML will proceed with default missing value handling method.",
817
+ progress_bar=self.progress_bar)
818
+ else:
819
+ self._display_msg(inline_msg="Proceeding with default option for missing value imputation.",
820
+ progress_bar=self.progress_bar)
821
+
822
+ # Proceeding with default method for handling remaining missing values
823
+ self._display_msg(inline_msg="Proceeding with default option for handling remaining missing values.",
824
+ progress_bar=self.progress_bar)
825
+ self._handling_missing_value()
826
+ self._impute_missing_value()
827
+
828
+ def _bin_code_transformation(self):
829
+ """
830
+ DESCRIPTION:
831
+ Function to perform customized binning on features based on user input.
832
+
833
+ """
834
+ # Fetching user input for performing bin code transformation.
835
+ bin_code_input = self.custom_data.get("BincodeIndicator", False)
836
+
837
+ if bin_code_input:
838
+ # Storing custom bin code transformation indicator in data transform dictionary
839
+ self.data_transform_dict['custom_bincode_ind'] = True
840
+ # Fetching list required for performing transfomation.
841
+ extracted_col = self.custom_data.get("BincodeParam", None)
842
+ if not extracted_col:
843
+ self._display_msg(inline_msg="BincodeParam is empty. Skipping customized bincode transformation.",
844
+ progress_bar=self.progress_bar)
845
+ else:
846
+ # Creating list for storing column and binning informartion for performing transformation
847
+ equal_width_bin_list = []
848
+ equal_width_bin_columns = []
849
+ var_width_bin_list = []
850
+ var_width_bin_columns = []
851
+
852
+ # Checking for column present in dataset or not
853
+ _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "BincodeParam", self.data, "df")
854
+
855
+ for col,transform_val in extracted_col.items():
856
+ # Fetching type of binning to be performed
857
+ bin_trans_type = transform_val["Type"]
858
+ # Fetching number of bins to be created
859
+ num_bin = transform_val["NumOfBins"]
860
+ # Checking for bin types and adding details into lists for binning
861
+ if bin_trans_type == "Equal-Width":
862
+ bins = num_bin
863
+ equal_width_bin_list.append(bins)
864
+ equal_width_bin_columns.append(col)
865
+ elif bin_trans_type == "Variable-Width":
866
+ var_width_bin_columns.append(col)
867
+ bins = num_bin
868
+ for i in range(1, bins+1):
869
+ # Forming binning name as per expected input
870
+ temp="Bin_"+str(i)
871
+ # Fetching required details for variable type binning
872
+ minval = transform_val[temp]["min_value"]
873
+ maxval = transform_val[temp]["max_value"]
874
+ label = transform_val[temp]["label"]
875
+ # Appending information of each bin
876
+ var_width_bin_list.append({ "ColumnName":col, "MinValue":minval, "MaxValue":maxval, "Label":label})
877
+ # Checking column list for performing binning with Equal-Width.
878
+ if len(equal_width_bin_columns) != 0:
879
+ # Adding fit parameter for performing binning with Equal-Width.
880
+ fit_params={
881
+ "data" : self.data,
882
+ "target_columns": equal_width_bin_columns,
883
+ "method_type" : "Equal-Width",
884
+ "nbins" : bins
885
+ }
886
+ eql_bin_code_fit = BincodeFit(**fit_params)
887
+ # Storing fit object and column list for Equal-Width binning in data transform dictionary
888
+ self.data_transform_dict['custom_eql_bincode_col'] = equal_width_bin_columns
889
+ self.data_transform_dict['custom_eql_bincode_fit_object'] = eql_bin_code_fit.output
890
+ # Extracting accumulate columns
891
+ accumulate_columns = self._extract_list(self.data.columns, equal_width_bin_columns)
892
+ # Adding transform parameters for performing binning with Equal-Width.
893
+ eql_transform_params={
894
+ "data" : self.data,
895
+ "object" : eql_bin_code_fit.output,
896
+ "accumulate" : accumulate_columns,
897
+ "persist" : True,
898
+ }
899
+ self.data = BincodeTransform(**eql_transform_params).result
900
+ # Adding transformed data containing table to garbage collector
901
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
902
+ self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
903
+ data=self.data,
904
+ progress_bar=self.progress_bar)
905
+ else:
906
+ self._display_msg(inline_msg="No information provided for Equal-Width Transformation.",
907
+ progress_bar=self.progress_bar)
908
+
909
+ if len(var_width_bin_columns) != 0:
910
+ # Creating pandas dataframe and then teradata dataframe for storing binning information
911
+ var_bin_table = pd.DataFrame(var_width_bin_list, columns=["ColumnName", "MinValue", "MaxValue", "Label"])
912
+ self._display_msg(msg="Variable-Width binning information:-",
913
+ data=var_bin_table,
914
+ progress_bar=self.progress_bar)
915
+ copy_to_sql(df=var_bin_table, table_name="automl_bincode_var_fit", temporary=True)
916
+ var_fit_input = DataFrame.from_table("automl_bincode_var_fit")
917
+ fit_params = {
918
+ "data" : self.data,
919
+ "fit_data": var_fit_input,
920
+ "fit_data_order_column" : ["MinValue", "MaxValue"],
921
+ "target_columns": var_width_bin_columns,
922
+ "minvalue_column" : "MinValue",
923
+ "maxvalue_column" : "MaxValue",
924
+ "label_column" : "Label",
925
+ "method_type" : "Variable-Width",
926
+ "label_prefix" : "label_prefix"
927
+ }
928
+ var_bin_code_fit = BincodeFit(**fit_params)
929
+ # Storing fit object and column list for Variable-Width binning in data transform dictionary
930
+ self.data_transform_dict['custom_var_bincode_col'] = var_width_bin_columns
931
+ self.data_transform_dict['custom_var_bincode_fit_object'] = var_bin_code_fit.output
932
+ accumulate_columns = self._extract_list(self.data.columns, var_width_bin_columns)
933
+ var_transform_params = {
934
+ "data" : self.data,
935
+ "object" : var_bin_code_fit.output,
936
+ "object_order_column" : "TD_MinValue_BINFIT",
937
+ "accumulate" : accumulate_columns,
938
+ "persist" : True
939
+ }
940
+ self.data = BincodeTransform(**var_transform_params).result
941
+ # Adding transformed data containing table to garbage collector
942
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
943
+ self._display_msg(msg="Updated dataset sample after performing Variable-Width binning:",
944
+ data=self.data,
945
+ progress_bar=self.progress_bar)
946
+ else:
947
+ self._display_msg(inline_msg="No information provided for Variable-Width Transformation.",
948
+ progress_bar=self.progress_bar)
949
+ else:
950
+ self._display_msg(inline_msg="No information provided for Variable-Width Transformation.",
951
+ progress_bar=self.progress_bar)
952
+
953
+ def _string_manipulation(self):
954
+ """
955
+ DESCRIPTION:
956
+ Function to perform customized string manipulations on categorical features based on user input.
957
+
958
+ """
959
+ # Fetching user input for performing string manipulation.
960
+ str_mnpl_input = self.custom_data.get("StringManipulationIndicator", False)
961
+ # Checking user input for string manipulation on categrical features.
962
+ if str_mnpl_input:
963
+ # Storing custom string manipulation indicator in data transform dictionary
964
+ self.data_transform_dict['custom_string_manipulation_ind'] = True
965
+ # Fetching list required for performing operation.
966
+ extracted_col = self.custom_data.get("StringManipulationParam", None)
967
+ if not extracted_col:
968
+ self._display_msg(inline_msg="No information provided for performing string manipulation.",
969
+ progress_bar=self.progress_bar)
970
+ else:
971
+ # Checking for column present in dataset or not
972
+ _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "StringManipulationParam", self.data, "df")
973
+
974
+ for target_col,transform_val in extracted_col.items():
975
+ self.data = self._str_method_mapping(target_col, transform_val)
976
+ # Storing custom string manipulation parameters in data transform dictionary
977
+ self.data_transform_dict['custom_string_manipulation_param'] = extracted_col
978
+
979
+ self._display_msg(msg="Updated dataset sample after performing string manipulation:",
980
+ data=self.data,
981
+ progress_bar=self.progress_bar)
982
+ else:
983
+ self._display_msg(inline_msg="Skipping customized string manipulation.")
984
+
985
+ def _str_method_mapping(self,
986
+ target_col,
987
+ transform_val):
988
+ """
989
+ DESCRIPTION:
990
+ Function to map customized parameters according to passed method and
991
+ performs string manipulation on categorical features.
992
+
993
+ PARAMETERS:
994
+ target_col:
995
+ Required Argument.
996
+ Specifies feature for applying string manipulation.
997
+ Types: str
998
+
999
+ transform_val:
1000
+ Required Argument.
1001
+ Specifies different parameter require for applying string manipulation.
1002
+ Types: dict
1003
+
1004
+ RETURNS:
1005
+ Dataframe containing transformed data after applying string manipulation.
1006
+
1007
+ """
1008
+ # Creating list of features for accumulating while performing string manipulation on certain features
1009
+ accumulate_columns = self._extract_list(self.data.columns, [target_col])
1010
+
1011
+ # Fetching required parameters from json object
1012
+ string_operation = transform_val["StringOperation"]
1013
+
1014
+ # Storing general parameters for performing string transformation
1015
+ fit_params = {
1016
+ "data" : self.data,
1017
+ "target_columns" : target_col,
1018
+ "string_operation" : string_operation,
1019
+ "accumulate" : accumulate_columns,
1020
+ "inplace" : True,
1021
+ "persist" : True
1022
+ }
1023
+ # Adding additional parameters based on string operation type
1024
+ if string_operation in ["StringCon", "StringTrim"]:
1025
+ string_argument = transform_val["String"]
1026
+ fit_params = {**fit_params,
1027
+ "string" : string_argument}
1028
+ elif string_operation == "StringPad":
1029
+ string_argument = transform_val["String"]
1030
+ string_length = transform_val["StringLength"]
1031
+ fit_params = {**fit_params,
1032
+ "string" : string_argument,
1033
+ "string_length" : string_length}
1034
+ elif string_operation == "Substring":
1035
+ string_index = transform_val["StartIndex"]
1036
+ string_length = transform_val["StringLength"]
1037
+ fit_params = {**fit_params,
1038
+ "start_index" : string_index,
1039
+ "string_length" : string_length}
1040
+
1041
+ # returning dataset after performing string manipulation
1042
+ transform_output = StrApply(**fit_params).result
1043
+ # Adding transformed data containing table to garbage collector
1044
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1045
+ return transform_output
1046
+
1047
+ def _one_hot_encoding(self,
1048
+ one_hot_columns,
1049
+ unique_counts):
1050
+ """
1051
+ DESCRIPTION:
1052
+ Function performs the one hot encoding to categorcial columns/features in the dataset.
1053
+
1054
+ PARAMETERS:
1055
+ one_hot_columns:
1056
+ Required Argument.
1057
+ Specifies the categorical columns for which one hot encoding will be performed.
1058
+ Types: str or list of strings (str)
1059
+
1060
+ unique_counts:
1061
+ Required Argument.
1062
+ Specifies the unique counts in the categorical columns.
1063
+ Types: int or list of integer (int)
1064
+
1065
+ """
1066
+ # TD function will add extra column_other in onehotEncoding, so
1067
+ # initailizing this list to remove those extra columns
1068
+ drop_lst = [ele + "_other" for ele in one_hot_columns]
1069
+ # Adding fit parameters for performing encoding
1070
+ fit_params = {
1071
+ "data" : self.data,
1072
+ "approach" : "auto",
1073
+ "is_input_dense" : True,
1074
+ "target_column" : one_hot_columns,
1075
+ "category_counts" : unique_counts,
1076
+ "other_column" : "other"
1077
+ }
1078
+ # Performing one hot encoding fit on target columns
1079
+ fit_obj = OneHotEncodingFit(**fit_params)
1080
+ # Storing indicator, fit object and column drop list for one hot encoding in data transform dictionary
1081
+ self.data_transform_dict['one_hot_encoding_ind'] = True
1082
+ self.data_transform_dict['one_hot_encoding_fit_obj'].update({self.one_hot_obj_count : fit_obj.result})
1083
+ self.data_transform_dict['one_hot_encoding_drop_list'].extend(drop_lst)
1084
+ self.one_hot_obj_count = self.one_hot_obj_count + 1
1085
+ # Adding transform parameters for performing encoding
1086
+ transform_params = {
1087
+ "data" : self.data,
1088
+ "object" : fit_obj.result,
1089
+ "is_input_dense" : True,
1090
+ "persist" : True
1091
+ }
1092
+ # Performing one hot encoding transformation
1093
+ transform_output = OneHotEncodingTransform(**transform_params).result
1094
+ # Adding transformed data containing table to garbage collector
1095
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1096
+ self.data = transform_output.drop(drop_lst, axis=1)
1097
+
1098
+ def _ordinal_encoding(self,
1099
+ ordinal_columns):
1100
+ """
1101
+ DESCRIPTION:
1102
+ Function performs the ordinal encoding to categorcial columns or features in the dataset.
1103
+
1104
+ PARAMETERS:
1105
+ ordinal_columns:
1106
+ Required Argument.
1107
+ Specifies the categorical columns for which ordinal encoding will be performed.
1108
+ Types: str or list of strings (str)
1109
+ """
1110
+ # Adding fit parameters for performing encoding
1111
+ fit_params = {
1112
+ "data" : self.data,
1113
+ "target_column" : ordinal_columns
1114
+ }
1115
+ # Performing ordinal encoding fit on target columns
1116
+ ord_fit_obj = OrdinalEncodingFit(**fit_params)
1117
+ # Storing fit object and column list for ordinal encoding in data transform dictionary
1118
+ if ordinal_columns[0] != self.target_column:
1119
+ self.data_transform_dict["custom_ord_encoding_fit_obj"] = ord_fit_obj.result
1120
+ self.data_transform_dict['custom_ord_encoding_col'] = ordinal_columns
1121
+ else:
1122
+ self.data_transform_dict['target_col_encode_ind'] = True
1123
+ self.data_transform_dict['target_col_ord_encoding_fit_obj'] = ord_fit_obj.result
1124
+ # Extracting accumulate columns
1125
+ accumulate_columns = self._extract_list(self.data.columns, ordinal_columns)
1126
+ # Adding transform parameters for performing encoding
1127
+ transform_params = {
1128
+ "data" : self.data,
1129
+ "object" : ord_fit_obj.result,
1130
+ "accumulate" : accumulate_columns,
1131
+ "persist" : True
1132
+ }
1133
+ # Performing ordinal encoding transformation
1134
+ self.data = OrdinalEncodingTransform(**transform_params).result
1135
+ # Adding transformed data containing table to garbage collector
1136
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1137
+
1138
+ if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
1139
+ self.target_label = ord_fit_obj
1140
+
1141
+
1142
+ def _target_encoding(self,
1143
+ target_encoding_list):
1144
+ """
1145
+ DESCRIPTION:
1146
+ Function performs the target encoding to categorcial columns/features in the dataset.
1147
+
1148
+ PARAMETERS:
1149
+ target_encoding_list:
1150
+ Required Argument.
1151
+ Specifies the categorical columns for which target encoding will be performed.
1152
+ Types: str or list of strings (str)
1153
+ """
1154
+ # Fetching all columns on which target encoding will be performed.
1155
+ target_columns= list(target_encoding_list.keys())
1156
+ # Checking for column present in dataset or not
1157
+ _Validators._validate_dataframe_has_argument_columns(target_columns, "TargetEncodingList", self.data, "df")
1158
+ # Finding distinct values and counts for columns.
1159
+ cat_sum = CategoricalSummary(data = self.data,
1160
+ target_columns = target_columns)
1161
+ category_data=cat_sum.result.groupby("ColumnName").count()
1162
+ category_data = category_data.assign(drop_columns = True,
1163
+ ColumnName = category_data.ColumnName,
1164
+ CategoryCount = category_data.count_DistinctValue)
1165
+ # Storing indicator and fit object for target encoding in data transform dictionary
1166
+ self.data_transform_dict["custom_target_encoding_ind"] = True
1167
+ self.data_transform_dict["custom_target_encoding_fit_obj"] = {}
1168
+ # Fetching required argument for performing target encoding
1169
+ for col,transform_val in target_encoding_list.items():
1170
+ encoder_method = transform_val["encoder_method"]
1171
+ response_column = transform_val["response_column"]
1172
+ # Adding fit parameters for performing encoding
1173
+ fit_params = {
1174
+ "data" : self.data,
1175
+ "category_data" : category_data,
1176
+ "encoder_method" : encoder_method,
1177
+ "target_columns" : col,
1178
+ "response_column" : response_column
1179
+ }
1180
+ if encoder_method == "CBM_DIRICHLET":
1181
+ num_distinct_responses=transform_val["num_distinct_responses"]
1182
+ fit_params = {**fit_params,
1183
+ "num_distinct_responses" : num_distinct_responses}
1184
+ # Performing target encoding fit on target columns
1185
+ tar_fit_obj = TargetEncodingFit(**fit_params)
1186
+ # Storing each column fit object for target encoding in data transform dictionary
1187
+ self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj})
1188
+ # Extracting accumulate columns
1189
+ accumulate_columns = self._extract_list(self.data.columns, [col])
1190
+ # Adding transform parameters for performing encoding
1191
+ transform_params = {
1192
+ "data" : self.data,
1193
+ "object" : tar_fit_obj,
1194
+ "accumulate" : accumulate_columns,
1195
+ "persist" : True
1196
+ }
1197
+ # Performing ordinal encoding transformation
1198
+ self.data = TargetEncodingTransform(**transform_params).result
1199
+ # Adding transformed data containing table to garbage collector
1200
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1201
+
1202
+ def _encoding_categorical_columns(self):
1203
+ """
1204
+ DESCRIPTION:
1205
+ Function detects the categorical columns and performs encoding on categorical columns in the dataset.
1206
+ """
1207
+ self._display_msg(msg="\nPerforming encoding for categorical columns ...",
1208
+ progress_bar=self.progress_bar,
1209
+ show_data=True)
1210
+ start_time = time.time()
1211
+
1212
+ ohe_col = []
1213
+ unique_count = []
1214
+
1215
+ # List of columns before one hot
1216
+ col_bf_ohe = self.data.columns
1217
+
1218
+ # Get distinct value in each column
1219
+ self._get_distinct_count()
1220
+
1221
+ # Detecting categorical columns with thier unique counts
1222
+ for col, d_type in self.data._column_names_and_types:
1223
+ if d_type in ['str']:
1224
+ ohe_col.append(col)
1225
+ unique_count.append(self.counts_dict[f'count_{col}'])
1226
+
1227
+ if len(ohe_col) != 0:
1228
+ self._one_hot_encoding(ohe_col, unique_count)
1229
+
1230
+ self._display_msg(msg="ONE HOT Encoding these Columns:",
1231
+ col_lst=ohe_col,
1232
+ progress_bar=self.progress_bar)
1233
+ self._display_msg(msg="Sample of dataset after performing one hot encoding:",
1234
+ data=self.data,
1235
+ progress_bar=self.progress_bar)
1236
+ else:
1237
+ self._display_msg(inline_msg="Analysis completed. No categorical columns were found.",
1238
+ progress_bar=self.progress_bar)
1239
+
1240
+ # List of columns after one hot
1241
+ col_af_ohe = self.data.columns
1242
+
1243
+ # List of excluded columns from outlier processing and scaling
1244
+ self.excluded_cols= self._extract_list(col_af_ohe, col_bf_ohe)
1245
+
1246
+ end_time = time.time()
1247
+ self._display_msg(msg="Time taken to encode the columns: {:.2f} sec".format( end_time - start_time),
1248
+ progress_bar=self.progress_bar,
1249
+ show_data=True)
1250
+
1251
+ def _custom_categorical_encoding(self):
1252
+ """
1253
+ DESCRIPTION:
1254
+ Function to perform specific encoding on the categorical columns based on user input.
1255
+ if validation fails, default encoding is getting performed on all remaining categorical columns.
1256
+ """
1257
+ self._display_msg(msg="\nStarting Customized Categorical Feature Encoding ...",
1258
+ progress_bar=self.progress_bar)
1259
+ cat_end_input = self.custom_data.get("CategoricalEncodingIndicator", False)
1260
+ # Checking user input for categorical encoding
1261
+ if cat_end_input:
1262
+ # Storing custom categorical encoding indicator in data transform dictionary
1263
+ self.data_transform_dict["custom_categorical_encoding_ind"] = True
1264
+ # Fetching user input list for performing
1265
+ encoding_list = self.custom_data.get("CategoricalEncodingParam", None)
1266
+ if encoding_list:
1267
+ onehot_encode_ind = encoding_list.get("OneHotEncodingIndicator", False)
1268
+ ordinal_encode_ind = encoding_list.get("OrdinalEncodingIndicator", False)
1269
+ target_encode_ind = encoding_list.get("TargetEncodingIndicator", False)
1270
+ # Checking if any of categorical encoding technique indicator
1271
+ if not any([onehot_encode_ind, ordinal_encode_ind, target_encode_ind]):
1272
+ self._display_msg(inline_msg="No information provided for any type of customized categorical encoding techniques. AutoML will proceed with default encoding technique.",
1273
+ progress_bar=self.progress_bar)
1274
+ else:
1275
+ if onehot_encode_ind:
1276
+ unique_count = []
1277
+ ohe_list = encoding_list.get("OneHotEncodingList", None)
1278
+ # Checking for empty list
1279
+ if not ohe_list:
1280
+ self._display_msg(inline_msg="No information provided for customized one hot encoding technique.",
1281
+ progress_bar=self.progress_bar)
1282
+ else:
1283
+ # Checking for column present in dataset or not
1284
+ _Validators._validate_dataframe_has_argument_columns(ohe_list, "OneHotEncodingList", self.data, "df")
1285
+
1286
+ # Keeping track for existing columns before apply one hot encoding
1287
+ col_bf_ohe = self.data.columns
1288
+ # Detecting categorical columns with their unique counts
1289
+ for col in ohe_list:
1290
+ unique_count.append(self.data.drop_duplicate(col).size)
1291
+ # Performing one hot encoding
1292
+ self._one_hot_encoding(ohe_list, unique_count)
1293
+ # Keeping track for new columns after apply one hot encoding
1294
+ col_af_ohe = self.data.columns
1295
+ # Fetching list of columns on which outlier processing should not be applied
1296
+ self.excluded_cols.extend(self._extract_list(col_af_ohe, col_bf_ohe))
1297
+
1298
+ self._display_msg(msg="Updated dataset sample after performing one hot encoding:",
1299
+ data=self.data,
1300
+ progress_bar=self.progress_bar)
1301
+
1302
+ if ordinal_encode_ind:
1303
+ ord_list = encoding_list.get("OrdinalEncodingList", None)
1304
+ # Checking for empty list
1305
+ if not ord_list:
1306
+ self._display_msg(inline_msg="No information provided for customized ordinal encoding technique.",
1307
+ progress_bar=self.progress_bar)
1308
+ else:
1309
+ # Checking for column present in dataset or not
1310
+ _Validators._validate_dataframe_has_argument_columns(ord_list, "OrdinalEncodingList", self.data, "df")
1311
+
1312
+ # Performing ordinal encoding
1313
+ self._ordinal_encoding(ord_list)
1314
+ self._display_msg(msg="Updated dataset sample after performing ordinal encoding:",
1315
+ data=self.data,
1316
+ progress_bar=self.progress_bar)
1317
+
1318
+ if target_encode_ind:
1319
+ tar_list = encoding_list.get("TargetEncodingList", None)
1320
+ if not tar_list:
1321
+ self._display_msg(inline_msg="No information provided for customized target encoding technique.",
1322
+ progress_bar=self.progress_bar)
1323
+ else:
1324
+ # Performing target encoding
1325
+ self._target_encoding(tar_list)
1326
+ self._display_msg(msg="Updated dataset sample after performing target encoding:",
1327
+ data=self.data,
1328
+ progress_bar=self.progress_bar)
1329
+ else:
1330
+ self._display_msg(inline_msg="No input provided for performing customized categorical encoding. AutoML will proceed with default encoding technique.",
1331
+ progress_bar=self.progress_bar)
1332
+ else:
1333
+ self._display_msg(inline_msg="AutoML will proceed with default encoding technique.",
1334
+ progress_bar=self.progress_bar)
1335
+
1336
+ # Performing default encoding on remaining categorical columns
1337
+ self._encoding_categorical_columns()
1338
+
1339
+ def _numapply_transformation(self, target_col, transform_val):
1340
+ """
1341
+ DESCRIPTION:
1342
+ Function to perform different numerical transformations using NumApply on numerical features based on user input.
1343
+
1344
+ """
1345
+ # Fetching columns for accumulation
1346
+ accumulate_columns = self._extract_list(self.data.columns, [target_col])
1347
+ apply_method = transform_val["apply_method"]
1348
+ # Adding fit parameters for performing transformation
1349
+ fit_params={
1350
+ "data": self.data,
1351
+ "target_columns" : target_col,
1352
+ "apply_method" : apply_method,
1353
+ "inplace" : True,
1354
+ "persist" :True,
1355
+ "accumulate" : accumulate_columns
1356
+ }
1357
+ # Adding addition details for fit parameters in case of SIGMOID transformation
1358
+ if apply_method == "sigmoid":
1359
+ sigmoid_style=transform_val["sigmoid_style"]
1360
+ fit_params = {**fit_params, "sigmoid_style" : sigmoid_style}
1361
+ # Performing transformation on target columns
1362
+ transform_output = NumApply(**fit_params).result
1363
+ # Adding transformed data containing table to garbage collector
1364
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1365
+ return transform_output
1366
+
1367
+ def _numerical_transformation(self, target_columns, num_transform_data):
1368
+ """
1369
+ DESCRIPTION:
1370
+ Function to perform different numerical transformations using Fit and Transform on numerical features based on user input.
1371
+
1372
+ """
1373
+ # Adding fit parameters for transformation
1374
+ fit_params={
1375
+ "data" : self.data,
1376
+ "object" : num_transform_data,
1377
+ "object_order_column" : "TargetColumn"
1378
+ }
1379
+ # Peforming fit with all arguments.
1380
+ num_fit_obj = Fit(**fit_params)
1381
+ # Fetching all numerical columns
1382
+ numerical_columns = [col for col, d_type in self.data._column_names_and_types if d_type in ["int","float"]]
1383
+ # Extracting id columns where transformation should not affect numerical columns
1384
+ id_columns = self._extract_list(numerical_columns,target_columns)
1385
+ # Storing fit object and id column list for numerical transformation in data transform dictionary
1386
+ self.data_transform_dict['custom_numerical_transformation_fit_object'] = num_fit_obj.result
1387
+ self.data_transform_dict['custom_numerical_transformation_id_columns'] = id_columns
1388
+ # Adding transform parameters for transformation
1389
+ transform_params={
1390
+ "data" : self.data,
1391
+ "object" : num_fit_obj.result,
1392
+ "id_columns" : id_columns,
1393
+ "persist" :True
1394
+ }
1395
+ # Peforming transformation on target columns
1396
+ self.data = Transform(**transform_params).result
1397
+ # Adding transformed data containing table to garbage collector
1398
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1399
+ self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
1400
+ data=self.data,
1401
+ progress_bar=self.progress_bar)
1402
+
1403
+ def _mathematical_transformation(self):
1404
+ """
1405
+ DESCRIPTION:
1406
+ Function to perform different mathematical transformations (i.e., log, pow,
1407
+ exp, sininv, sigmoid) on numerical features based on user input.
1408
+ """
1409
+ self._display_msg(msg="\nStarting customized mathematical transformation ...",
1410
+ progress_bar=self.progress_bar,
1411
+ show_data=True)
1412
+
1413
+ mat_transform_input = self.custom_data.get("MathameticalTransformationIndicator", False)
1414
+ # Checking user input for mathematical transformations
1415
+ if mat_transform_input:
1416
+ # Extracting list required for mathematical transformations
1417
+ mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None)
1418
+ if mat_transform_list:
1419
+ # Checking for column present in dataset or not
1420
+ _Validators._validate_dataframe_has_argument_columns(list(mat_transform_list.keys()),
1421
+ "MathameticalTransformationParam", self.data, "df")
1422
+
1423
+ # List of storing target columns and mathematical transformation information
1424
+ transform_data=[]
1425
+ target_columns=[]
1426
+ # Storing custom mathematical transformation indicator in data transform dictionary
1427
+ self.data_transform_dict['custom_mathematical_transformation_ind'] = True
1428
+ # Storing custom numapply transformation parameters in data transform dictionary
1429
+ self.data_transform_dict['custom_numapply_transformation_param'] = {}
1430
+
1431
+ for col, transform_val in mat_transform_list.items():
1432
+ apply_method=transform_val["apply_method"]
1433
+ if apply_method in (["sininv","sigmoid"]):
1434
+ # Applying numapply transformation
1435
+ self.data = self._numapply_transformation(col,transform_val)
1436
+ self._display_msg(msg="Updated dataset sample after applying numapply transformation:",
1437
+ data=self.data,
1438
+ progress_bar=self.progress_bar)
1439
+ # Updating parameter details for each column
1440
+ self.data_transform_dict['custom_numapply_transformation_param'].update({col:transform_val})
1441
+ else:
1442
+ # Handling specific scenarios for log and pow transformation
1443
+ parameters=""
1444
+ if apply_method == "log":
1445
+ base = transform_val["base"]
1446
+ parameters = json.dumps({"base":base})
1447
+ elif apply_method == "pow":
1448
+ exponent = transform_val["exponent"]
1449
+ parameters = json.dumps({"exponent":exponent})
1450
+ target_columns.append(col)
1451
+ transform_data.append({"TargetColumn":col, "DefaultValue":1, "Transformation":apply_method, "Parameters":parameters})
1452
+ # Checking for transformation data
1453
+ if len(transform_data):
1454
+ # Coverting into pandas and then teradata dataframe for performing further opration
1455
+ transform_data = pd.DataFrame(transform_data, columns=["TargetColumn", "DefaultValue", "Transformation", "Parameters"])
1456
+ self._display_msg(msg="Numerical transformation information :-",
1457
+ data=transform_data,
1458
+ progress_bar=self.progress_bar)
1459
+ copy_to_sql(df=transform_data, table_name="automl_num_transform_data", temporary=True)
1460
+ num_transform_data = DataFrame.from_table("automl_num_transform_data")
1461
+ # Applying transformation using Fit/Transform functions
1462
+ self._numerical_transformation(target_columns, num_transform_data)
1463
+ # Storing custom numerical transformation parameters and column list in data transform dictionary
1464
+ self.data_transform_dict['custom_numerical_transformation_col'] = target_columns
1465
+ self.data_transform_dict['custom_numerical_transformation_params'] = num_transform_data
1466
+ else:
1467
+ self._display_msg(inline_msg="No input provided for performing customized mathematical transformation.",
1468
+ progress_bar=self.progress_bar)
1469
+ else:
1470
+ self._display_msg(inline_msg="Skipping customized mathematical transformation.",
1471
+ progress_bar=self.progress_bar)
1472
+
1473
+ def _non_linear_transformation(self):
1474
+ """
1475
+ DESCRIPTION:
1476
+ Function to perform customized non-linear transformation on numerical features based on user input.
1477
+
1478
+ """
1479
+ self._display_msg(msg="\nStarting customized non-linear transformation ...",
1480
+ progress_bar=self.progress_bar,
1481
+ show_data=True)
1482
+ nl_transform_input = self.custom_data.get("NonLinearTransformationIndicator", False)
1483
+ # Checking user input for non-linear transformation
1484
+ if nl_transform_input:
1485
+ nl_transform_list = self.custom_data.get("NonLinearTransformationParam", None)
1486
+ # Extracting list required for non-linear transformation
1487
+ if nl_transform_list:
1488
+ total_combination = len(nl_transform_list)
1489
+ # Generating all possible combination names
1490
+ possible_combination = ["Combination_"+str(counter) for counter in range(1,total_combination+1)]
1491
+ self._display_msg(msg="Possible combination :",
1492
+ col_lst=possible_combination,
1493
+ progress_bar=self.progress_bar)
1494
+ # Storing custom non-linear transformation indicator in data transform dictionary
1495
+ self.data_transform_dict['custom_non_linear_transformation_ind'] = True
1496
+ # Storing custom non-linear transformation fit object in data transform dictionary
1497
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'] = {}
1498
+ # print("Possible combination :",possible_combination)
1499
+ # Performing transformation for each combination
1500
+ for comb, transform_val in nl_transform_list.items():
1501
+ if comb in possible_combination:
1502
+ target_columns = transform_val["target_columns"]
1503
+ # Checking for column present in dataset or not
1504
+ _Validators._validate_dataframe_has_argument_columns(target_columns,
1505
+ "target_columns", self.data, "df")
1506
+
1507
+ formula = transform_val["formula"]
1508
+ result_column = transform_val["result_column"]
1509
+ # Adding fit params for transformation
1510
+ fit_param = {
1511
+ "data" : self.data,
1512
+ "target_columns" : target_columns,
1513
+ "formula" : formula,
1514
+ "result_column" : result_column
1515
+ }
1516
+ # Performing fit on dataset
1517
+ fit_obj = NonLinearCombineFit(**fit_param)
1518
+ # Updating it for each non-linear combination
1519
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj})
1520
+ # Adding transform params for transformation
1521
+ transform_params = {
1522
+ "data" : self.data,
1523
+ "object" : fit_obj,
1524
+ "accumulate" : self.data.columns,
1525
+ "persist" : True
1526
+ }
1527
+ self.data = NonLinearCombineTransform(**transform_params).result
1528
+ # Adding transformed data containing table to garbage collector
1529
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1530
+ else:
1531
+ self._display_msg(inline_msg="Combinations are not as per expectation.",
1532
+ progress_bar=self.progress_bar)
1533
+ self._display_msg(msg="Updated dataset sample after performing non-liner transformation:",
1534
+ data=self.data,
1535
+ progress_bar=self.progress_bar)
1536
+ else:
1537
+ self._display_msg(inline_msg="No information provided for performing customized non-linear transformation.",
1538
+ progress_bar=self.progress_bar)
1539
+ else:
1540
+ self._display_msg(inline_msg="Skipping customized non-linear transformation.",
1541
+ progress_bar=self.progress_bar)
1542
+
1543
+ def _anti_select_columns(self):
1544
+ """
1545
+ DESCRIPTION:
1546
+ Function to remove specific features from dataset based on user input.
1547
+
1548
+ """
1549
+ self._display_msg(msg="\nStarting customized anti-select columns ...",
1550
+ progress_bar=self.progress_bar,
1551
+ show_data=True)
1552
+ anti_select_input = self.custom_data.get("AntiselectIndicator", False)
1553
+ # Checking user input for anti-select columns
1554
+ if anti_select_input:
1555
+ # Extracting list required for anti-select columns
1556
+ anti_select_list = self.custom_data.get("AntiselectParam", None)
1557
+ if(anti_select_list):
1558
+ if all(item in self.data.columns for item in anti_select_list):
1559
+ # Storing custom anti-select columns indicator and column list in data transform dictionary
1560
+ self.data_transform_dict['custom_anti_select_columns_ind'] = True
1561
+ self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
1562
+ fit_params = {
1563
+ "data" : self.data,
1564
+ "exclude" : anti_select_list
1565
+ }
1566
+ # Performing transformation for given user input
1567
+ self.data = Antiselect(**fit_params).result
1568
+ self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
1569
+ data=self.data,
1570
+ progress_bar=self.progress_bar)
1571
+ else:
1572
+ self._display_msg(msg="Columns provided in list are not present in dataset:",
1573
+ col_lst=anti_select_list,
1574
+ progress_bar=self.progress_bar)
1575
+ else:
1576
+ self._display_msg(inline_msg="No information provided for performing anti-select columns operation.",
1577
+ progress_bar=self.progress_bar)
1578
+ else:
1579
+ self._display_msg(inline_msg="Skipping customized anti-select columns.",
1580
+ progress_bar=self.progress_bar)