teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1683 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2024 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import json
18
+ import numpy as np
19
+ from sklearn.metrics import confusion_matrix
20
+ import time
21
+
22
+ # Teradata libraries
23
+ from teradataml.dataframe.copy_to import copy_to_sql
24
+ from teradataml import ColumnExpression
25
+ from teradataml.dataframe.dataframe import DataFrame
26
+ from teradataml.utils.validators import _Validators
27
+ from teradataml import ROC
28
+ from teradataml.common.utils import UtilFuncs
29
+ from teradataml.utils.dtypes import _Dtypes
30
+ from teradataml.common.utils import UtilFuncs
31
+ from teradataml import TeradataMlException
32
+ from teradataml.common.messages import Messages, MessageCodes
33
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
34
+
35
+ # AutoML Internal libraries
36
+ from teradataml.automl.data_preparation import _DataPreparation
37
+ from teradataml.automl.feature_engineering import _FeatureEngineering
38
+ from teradataml.automl.feature_exploration import _FeatureExplore, _is_terminal
39
+ from teradataml.automl.model_evaluation import _ModelEvaluator
40
+ from teradataml.automl.model_training import _ModelTraining
41
+ from teradataml.automl.data_transformation import _DataTransformation
42
+ from teradataml.automl.custom_json_utils import _GenerateCustomJson
43
+
44
+
45
+ class AutoML:
46
+
47
+ def __init__(self,
48
+ task_type = "Default",
49
+ include = None,
50
+ exclude = None,
51
+ verbose = 0,
52
+ max_runtime_secs = None,
53
+ stopping_metric = None,
54
+ stopping_tolerance = None,
55
+ max_models = None,
56
+ custom_config_file = None):
57
+ """
58
+ DESCRIPTION:
59
+ AutoML (Automated Machine Learning) is an approach that automates the process
60
+ of building, training, and validating machine learning models. It involves
61
+ various algorithms to automate various aspects of the machine learning workflow,
62
+ such as data preparation, feature engineering, model selection, hyperparameter
63
+ tuning, and model deployment. It aims to simplify the process of building
64
+ machine learning models, by automating some of the more time-consuming
65
+ and labor-intensive tasks involved in the process.
66
+
67
+ AutoML is designed to handle both regression and classification (binary and
68
+ multiclass) tasks. User can specify the task type whether to apply
69
+ regression OR classification algorithm on the provided dataset. By default, AutoML
70
+ decides the task type.
71
+
72
+ AutoML by default, trains using all model algorithms applicable for the
73
+ task type problem. For example, "glm" and "svm" does not support multi-class
74
+ classification problem. Thus, only 3 models are available to train in case
75
+ of multi-class classification problem, by default. While for regression and
76
+ binary classification problem, all 5 models i.e., "glm", "svm", "knn",
77
+ "decision_forest", "xgboost" are available to train by default.
78
+
79
+ AutoML provides functionality to use specific model algorithms for training.
80
+ User can provide either include or exclude model. In case of include,
81
+ only specified models are trained while for exclude, all models except
82
+ specified model are trained.
83
+
84
+ AutoML also provides an option to customize the processes within feature
85
+ engineering, data preparation and model training phases. User can customize
86
+ the processes by passing the JSON file path in case of custom run. It also
87
+ supports early stopping of model training based on stopping metrics,
88
+ maximum running time and maximum models to be trained.
89
+
90
+ PARAMETERS:
91
+ task_type:
92
+ Optional Argument.
93
+ Specifies the task type for AutoML, whether to apply regression OR classification
94
+ on the provided dataset. If user wants AutoML to decide the task type automatically,
95
+ then it should be set to "Default".
96
+ Default Value: "Default"
97
+ Permitted Values: "Regression", "Classification", "Default"
98
+ Types: str
99
+
100
+ include:
101
+ Optional Argument.
102
+ Specifies the model algorithms to be used for model training phase.
103
+ By default, all 5 models are used for training for regression and binary
104
+ classification problem, while only 3 models are used for multi-class.
105
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
106
+ Types: str OR list of str
107
+
108
+
109
+ exclude:
110
+ Optional Argument.
111
+ Specifies the model algorithms to be excluded from model training phase.
112
+ No model is excluded by default.
113
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
114
+ Types: str OR list of str
115
+
116
+ verbose:
117
+ Optional Argument.
118
+ Specifies the detailed execution steps based on verbose level.
119
+ Default Value: 0
120
+ Permitted Values:
121
+ * 0: prints the progress bar and leaderboard
122
+ * 1: prints the execution steps of AutoML.
123
+ * 2: prints the intermediate data between the execution of each step of AutoML.
124
+ Types: int
125
+
126
+ max_runtime_secs:
127
+ Optional Argument.
128
+ Specifies the time limit in seconds for model training.
129
+ Types: int
130
+
131
+ stopping_metric:
132
+ Required, when "stopping_tolerance" is set, otherwise optional.
133
+ Specifies the stopping metrics for stopping tolerance in model training.
134
+ Permitted Values:
135
+ * For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
136
+ "RMSE", "RMSLE"
137
+ * For task_type "Classification": 'MICRO-F1','MACRO-F1',
138
+ 'MICRO-RECALL','MACRO-RECALL',
139
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
140
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
141
+ 'WEIGHTED-F1', 'ACCURACY'
142
+ Types: str
143
+
144
+ stopping_tolerance:
145
+ Required, when "stopping_metric" is set, otherwise optional.
146
+ Specifies the stopping tolerance for stopping metrics in model training.
147
+ Types: float
148
+
149
+ max_models:
150
+ Optional Argument.
151
+ Specifies the maximum number of models to be trained.
152
+ Types: int
153
+
154
+ custom_config_file:
155
+ Optional Argument.
156
+ Specifies the path of JSON file in case of custom run.
157
+ Types: str
158
+
159
+ RETURNS:
160
+ Instance of AutoML.
161
+
162
+ RAISES:
163
+ TeradataMlException, TypeError, ValueError
164
+
165
+ EXAMPLES:
166
+ # Notes:
167
+ # 1. Get the connection to Vantage to execute the function.
168
+ # 2. One must import the required functions mentioned in
169
+ # the example from teradataml.
170
+ # 3. Function raises error if not supported on the Vantage
171
+ # user is connected to.
172
+
173
+ # Load the example data.
174
+ >>> load_example_data("GLMPredict", ["admissions_test", "admissions_train"])
175
+ >>> load_example_data("decisionforestpredict", ["housing_train", "housing_test"])
176
+ >>> load_example_data("teradataml", "iris_input")
177
+
178
+ # Create teradataml DataFrames.
179
+ >>> admissions_train = DataFrame.from_table("admissions_train")
180
+ >>> admissions_test = DataFrame.from_table("admissions_test")
181
+ >>> housing_train = DataFrame.from_table("housing_train")
182
+ >>> housing_test = DataFrame.from_table("housing_test")
183
+ >>> iris_input = DataFrame.from_table("iris_input")
184
+
185
+ # Example 1: Run AutoML for classification problem.
186
+ # Scenario: Predict whether a student will be admitted to a university
187
+ # based on different factors. Run AutoML to get the best
188
+ # performing model out of available models.
189
+
190
+ # Create an instance of AutoML.
191
+ >>> automl_obj = AutoML(task_type="Classification")
192
+
193
+ # Fit the data.
194
+ >>> automl_obj.fit(admissions_train, "admitted")
195
+
196
+ # Run predict with best performing model.
197
+ >>> prediction = automl_obj.predict()
198
+ >>> prediction
199
+
200
+ # Run predict for new test data with best performing model.
201
+ >>> prediction = automl_obj.predict(admissions_test)
202
+ >>> prediction
203
+
204
+ # Run predict for new test data with second best performing model.
205
+ >>> prediction = automl_obj.predict(admissions_test, rank=2)
206
+ >>> prediction
207
+
208
+ # Display leaderboard.
209
+ >>> automl_obj.leaderboard()
210
+
211
+ # Display best performing model.
212
+ >>> automl_obj.leader()
213
+
214
+ # Example 2 : Run AutoML for regression problem.
215
+ # Scenario : Predict the price of house based on different factors.
216
+ # Run AutoML to get the best performing model using custom
217
+ # configuration file to customize different processes of
218
+ # AutoML Run. Use include to specify "xgbooost" and
219
+ # "decision_forset" models to be used for training.
220
+
221
+ # Generate custom JSON file
222
+ >>> AutoML.generate_custom_config("custom_housing")
223
+
224
+ # Create instance of AutoML.
225
+ >>> automl_obj = AutoML(task_type="Regression",
226
+ >>> verbose=1,
227
+ >>> include=["decision_forest", "xgboost"],
228
+ >>> custom_config_file="custom_housing.json")
229
+ # Fit the data.
230
+ >>> automl_obj.fit(housing_train, "price")
231
+
232
+ # Run predict with best performing model.
233
+ >>> prediction = automl_obj.predict()
234
+ >>> prediction
235
+
236
+ # Run predict for new test data with best performing model.
237
+ >>> prediction = automl_obj.predict(housing_test)
238
+ >>> prediction
239
+
240
+ # Run predict for new test data with second best performing model.
241
+ >>> prediction = automl_obj.predict(housing_test, rank=2)
242
+ >>> prediction
243
+
244
+ # Display leaderboard.
245
+ >>> automl_obj.leaderboard()
246
+
247
+ # Display best performing model.
248
+ >>> automl_obj.leader()
249
+
250
+ # Example 3 : Run AutoML for multiclass classification problem.
251
+ # Scenario : Predict the species of iris flower based on different
252
+ # factors. Use custom configuration file to customize
253
+ # different processes of AutoML Run to get the best
254
+ # performing model out of available models.
255
+
256
+ # Generate custom JSON file
257
+ >>> AutoML.generate_custom_config()
258
+
259
+ # Create instance of AutoML.
260
+ >>> automl_obj = AutoML(verbose=2,
261
+ >>> exclude="xgboost",
262
+ >>> custom_config_file="custom.json")
263
+ # Fit the data.
264
+ >>> automl_obj.fit(iris_input, iris_input.species)
265
+
266
+ # Run predict with best performing model.
267
+ >>> prediction = automl_obj.predict()
268
+ >>> prediction
269
+
270
+ # Run predict with second best performing model.
271
+ >>> prediction = automl_obj.predict(rank=2)
272
+ >>> prediction
273
+
274
+ # Display leaderboard.
275
+ >>> automl_obj.leaderboard()
276
+
277
+ # Display best performing model.
278
+ >>> automl_obj.leader()
279
+
280
+ # Example 4 : Run AutoML for regression problem with early stopping metric and tolerance.
281
+ # Scenario : Predict the price of house based on different factors.
282
+ # Use custom configuration file to customize different
283
+ # processes of AutoML Run. Define performance threshold
284
+ # to acquire for the available models, and terminate training
285
+ # upon meeting the stipulated performance criteria.
286
+
287
+ # Generate custom JSON file
288
+ >>> AutoML.generate_custom_config("custom_housing")
289
+
290
+ # Create instance of AutoML.
291
+ >>> automl_obj = AutoML(verbose=2,
292
+ >>> exclude="xgboost",
293
+ >>> stopping_metric="R2",
294
+ >>> stopping_tolerance=0.7,
295
+ >>> max_models=10,
296
+ >>> custom_config_file="custom_housing.json")
297
+ # Fit the data.
298
+ >>> automl_obj.fit(housing_train, "price")
299
+
300
+ # Run predict with best performing model.
301
+ >>> prediction = automl_obj.predict()
302
+ >>> prediction
303
+
304
+ # Display leaderboard.
305
+ >>> automl_obj.leaderboard()
306
+
307
+ # Example 5 : Run AutoML for regression problem with maximum runtime.
308
+ # Scenario : Predict the species of iris flower based on different factors.
309
+ # Run AutoML to get the best performing model in specified time.
310
+
311
+ # Create instance of AutoML.
312
+ >>> automl_obj = AutoML(verbose=2,
313
+ >>> exclude="xgboost",
314
+ >>> max_runtime_secs=500,
315
+ >>> max_models=3)
316
+ # Fit the data.
317
+ >>> automl_obj.fit(iris_input, iris_input.species)
318
+
319
+ # Run predict with best performing model.
320
+ >>> prediction = automl_obj.predict()
321
+ >>> prediction
322
+
323
+ # Run predict with second best performing model.
324
+ >>> prediction = automl_obj.predict(rank=2)
325
+ >>> prediction
326
+
327
+ # Display leaderboard.
328
+ >>> automl_obj.leaderboard()
329
+
330
+ # Display best performing model.
331
+ >>> automl_obj.leader()
332
+ """
333
+ # Appending arguments to list for validation
334
+ arg_info_matrix = []
335
+ arg_info_matrix.append(["task_type", task_type, True, (str), True, ["Regression", "Classification", "Default"]])
336
+ arg_info_matrix.append(["include", include, True, (str, list), True, ["glm", "svm", "knn",
337
+ "decision_forest", "xgboost"]])
338
+ arg_info_matrix.append(["exclude", exclude, True, (str, list), True, ["glm", "svm", "knn",
339
+ "decision_forest", "xgboost"]])
340
+ arg_info_matrix.append(["verbose", verbose, True, (int), True, [0,1,2]])
341
+ arg_info_matrix.append(["max_runtime_secs", max_runtime_secs, True, (int, float)])
342
+ arg_info_matrix.append(["stopping_metric", stopping_metric, True, (str), True, ["R2", 'MAE',
343
+ 'MSE', 'MSLE',
344
+ 'RMSE', 'RMSLE',
345
+ 'MICRO-F1','MACRO-F1',
346
+ 'MICRO-RECALL','MACRO-RECALL',
347
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
348
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
349
+ 'WEIGHTED-F1', 'ACCURACY']])
350
+ arg_info_matrix.append(["stopping_tolerance", stopping_tolerance, True, (float, int)])
351
+ arg_info_matrix.append(["max_models", max_models, True, (int)])
352
+ arg_info_matrix.append(["custom_config_file", custom_config_file, True, (str), True])
353
+
354
+
355
+ # Validate argument types
356
+ _Validators._validate_function_arguments(arg_info_matrix)
357
+ # Either include or exclude can be used.
358
+ if include is not None or exclude is not None:
359
+ _Validators._validate_mutually_exclusive_arguments(include, "include", exclude, "exclude")
360
+ # Validate mutually inclusive arguments
361
+ _Validators._validate_mutually_inclusive_arguments(stopping_metric, "stopping_metric", stopping_tolerance, "stopping_tolerance")
362
+ # Validate lower range for max_models
363
+ _Validators._validate_argument_range(max_models, "max_models", lbound=1, lbound_inclusive=True)
364
+
365
+ custom_data = None
366
+ self.auto = True
367
+ # Validate custom file
368
+ if custom_config_file:
369
+ # Performing validation
370
+ _Validators._validate_file_exists(custom_config_file)
371
+ _Validators._validate_file_extension(custom_config_file, "json")
372
+ _Validators._check_empty_file(custom_config_file)
373
+ # Setting auto to False
374
+ self.auto = False
375
+ # Loading file
376
+ with open(custom_config_file, 'r') as json_file:
377
+ custom_data = json.load(json_file)
378
+
379
+ # Initializing class variables
380
+ self.data = None
381
+ self.target_column = None
382
+ self.custom_data = custom_data
383
+ self.task_type = task_type
384
+ self.include_model = include
385
+ self.exclude_model = exclude
386
+ self.verbose = verbose
387
+ self.max_runtime_secs = max_runtime_secs
388
+ self.stopping_metric = stopping_metric
389
+ self.stopping_tolerance = stopping_tolerance
390
+ self.max_models = max_models
391
+ self.model_list = ['decision_forest', 'xgboost', 'knn', 'svm', 'glm']
392
+ self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
393
+ self._is_fit_called = False
394
+
395
+ @collect_queryband(queryband="AutoML_fit")
396
+ def fit(self,
397
+ data,
398
+ target_column):
399
+ """
400
+ DESCRIPTION:
401
+ Function triggers the AutoML run. It is designed to handle both
402
+ regression and classification tasks depending on the specified "task_type".
403
+
404
+ PARAMETERS:
405
+ data:
406
+ Required Argument.
407
+ Specifies the input teradataml DataFrame.
408
+ Types: teradataml Dataframe
409
+
410
+ target_column:
411
+ Required Argument.
412
+ Specifies target column of dataset.
413
+ Types: str or ColumnExpression
414
+
415
+ RETURNS:
416
+ None
417
+
418
+ RAISES:
419
+ TeradataMlException, TypeError, ValueError
420
+
421
+ EXAMPLES:
422
+ # Create an instance of the AutoML called "automl_obj"
423
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
424
+ # Perform fit() operation on the "automl_obj".
425
+
426
+ # Example 1: Passing column expression for target column.
427
+ >>> automl_obj.fit(data = housing_train, target_col = housing_train.price)
428
+
429
+ # Example 2: Passing name of target column.
430
+ >>> automl_obj.fit(data = housing_train, target_col = "price")
431
+ """
432
+
433
+ self._is_fit_called = True
434
+ # Checking if target column is of type ColumnExpression
435
+ if isinstance(target_column, ColumnExpression):
436
+ target_column = target_column.name
437
+
438
+ # Appending fit arguments to list for validation
439
+ arg_info_fit_matrix = []
440
+ arg_info_fit_matrix.append(["data", data, False, (DataFrame), True])
441
+ arg_info_fit_matrix.append(["target_column", target_column, False, (str), True])
442
+
443
+ # Validate argument types
444
+ _Validators._validate_function_arguments(arg_info_fit_matrix)
445
+
446
+ # Initializing class variables
447
+ self.data = data
448
+ self.target_column = target_column
449
+
450
+ # Checking if include model list is present
451
+ if self.include_model:
452
+ # Converting to list if passed as string
453
+ self.include_model = UtilFuncs._as_list(self.include_model)
454
+ # Updating model list based on include list
455
+ self.model_list = list(set(self.include_model))
456
+ self.model_list = [model.lower() for model in self.model_list]
457
+
458
+ # Checking if exclude model list is present
459
+ if self.exclude_model:
460
+ # Converting to list if passed as string
461
+ self.exclude_model = UtilFuncs._as_list(self.exclude_model)
462
+ # Updating model list based on exclude list
463
+ self.model_list = list(set(self.model_list) - set(self.exclude_model))
464
+ self.model_list = [model.lower() for model in self.model_list]
465
+
466
+ # Checking if target column is present in data
467
+ _Validators._validate_dataframe_has_argument_columns(self.target_column, "target_column", self.data, "df")
468
+
469
+ # Handling default task type
470
+ if self.task_type.casefold() == "default":
471
+ # if target column is having distinct values less than or equal to 20,
472
+ # then it will be mapped to classification problem else regression problem
473
+ if self.data.drop_duplicate(self.target_column).size <= 20:
474
+ print("\nTask type is set to Classification as target column "
475
+ "is having distinct values less than or equal to 20.")
476
+ self.task_type = "Classification"
477
+ else:
478
+ print("\nTask type is set to Regression as target column is "
479
+ "having distinct values greater than 20.")
480
+ self.task_type = "Regression"
481
+
482
+ if self.is_classification_type():
483
+ if self.stopping_metric is not None:
484
+ permitted_values = ["MICRO-F1", "MACRO-F1",
485
+ "MICRO-RECALL", "MACRO-RECALL",
486
+ "MICRO-PRECISION", "MACRO-PRECISION",
487
+ "WEIGHTED-PRECISION", "WEIGHTED-RECALL",
488
+ "WEIGHTED-F1", "ACCURACY"]
489
+ _Validators._validate_permitted_values(self.stopping_metric, permitted_values, "stopping_metric")
490
+ else:
491
+ if self.stopping_metric is not None:
492
+ permitted_values = ["R2", 'MAE', 'MSE', 'MSLE','RMSE', 'RMSLE']
493
+ _Validators._validate_permitted_values(self.stopping_metric, permitted_values, "stopping_metric")
494
+
495
+ if not self.is_classification_type():
496
+ _Validators._validate_column_type(self.data, self.target_column, 'target_column',
497
+ expected_types=UtilFuncs()._get_numeric_datatypes())
498
+
499
+ # Displaying received custom input
500
+ if self.custom_data:
501
+ print("\nReceived below input for customization : ")
502
+ print(json.dumps(self.custom_data, indent=4))
503
+
504
+ # Classification probelm
505
+ task_cls = _Classification
506
+ cls_method = "_classification"
507
+
508
+ # Regression problem
509
+ if self.task_type.casefold() == "regression":
510
+ task_cls = _Regression
511
+ cls_method = "_regression"
512
+
513
+ # Running AutoML
514
+ clf = task_cls(self.data, self.target_column, self.custom_data)
515
+
516
+ self.model_info, self.leader_board, self.target_count, self.target_label, \
517
+ self.data_transformation_params, self.table_name_mapping = getattr(clf, cls_method)(
518
+ model_list = self.model_list,
519
+ auto = self.auto,
520
+ verbose = self.verbose,
521
+ max_runtime_secs = self.max_runtime_secs,
522
+ stopping_metric = self.stopping_metric,
523
+ stopping_tolerance = self.stopping_tolerance,
524
+ max_models = self.max_models)
525
+
526
+ # Model Evaluation Phase
527
+ self.m_evaluator = _ModelEvaluator(self.model_info,
528
+ self.target_column,
529
+ self.task_type)
530
+
531
+ @collect_queryband(queryband="AutoML_predict")
532
+ def predict(self,
533
+ data = None,
534
+ rank = 1):
535
+ """
536
+ DESCRIPTION:
537
+ Function generates prediction on either default test data or any other data
538
+ using model rank in leaderboard and displays performance metrics
539
+ of the specified model.
540
+
541
+ If test data contains target column, then it displays both prediction
542
+ and performance metrics, otherwise displays only prediction.
543
+
544
+ PARAMETERS:
545
+ data:
546
+ Optional Argument.
547
+ Specifies the dataset on which prediction and performance
548
+ metrices needs to be generated using model rank in leaderboard.
549
+ When "data" is not specified default test data is used. Default
550
+ test data is the dataset generated at the time of training.
551
+ Types: teradataml DataFrame
552
+
553
+ rank:
554
+ Optional Argument.
555
+ Specifies the rank of the model in the leaderboard to be used for prediction.
556
+ Default Value: 1
557
+ Types: int
558
+
559
+ RETURNS:
560
+ Pandas DataFrame with predictions.
561
+
562
+ RAISES:
563
+ TeradataMlException, TypeError, ValueError
564
+
565
+ EXAMPLES:
566
+ # Create an instance of the AutoML called "automl_obj"
567
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
568
+ # Perform fit() operation on the "automl_obj".
569
+ # Perform predict() operation on the "automl_obj".
570
+
571
+ # Example 1: Run predict with best performing model.
572
+ >>> prediction = automl_obj.predict()
573
+ >>> prediction
574
+
575
+ # Example 2: Run predict with second best performing model.
576
+ >>> prediction = automl_obj.predict(rank=2)
577
+ >>> prediction
578
+
579
+ # Example 3: Run predict for new test data with best performing model.
580
+ >>> prediction = automl_obj.predict(admissions_test)
581
+ >>> prediction
582
+
583
+ # Example 4: Run predict for new test data with second best performing model.
584
+ >>> prediction = automl_obj.predict(admissions_test, rank=2)
585
+ >>> prediction
586
+ """
587
+ if not self._is_fit_called:
588
+ # raise ValueError("fit() method must be called before generating prediction.")
589
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
590
+ "'predict' method", \
591
+ "'fit' method must be called before" \
592
+ " running predict.")
593
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
594
+ # Appending predict arguments to list for validation.
595
+ arg_info_pred_matrix = []
596
+ arg_info_pred_matrix.append(["data", data, True, (DataFrame), True])
597
+ arg_info_pred_matrix.append(["rank", rank, True, (int), True])
598
+
599
+ # Validate argument types
600
+ _Validators._validate_function_arguments(arg_info_pred_matrix)
601
+ # Validate range for model rank
602
+ _Validators._validate_argument_range(rank, "rank", lbound=1,
603
+ ubound=self.leader_board.Rank.max(),
604
+ lbound_inclusive=True, ubound_inclusive=True)
605
+
606
+ # Setting test data indicator to default value, i.e., False.
607
+ self.test_data_ind = False
608
+ # Setting target column indicator to default value, i.e., False.
609
+ self.target_column_ind = False
610
+ # Model Evaluation using rank-1 [rank starts from 0 in leaderboard]
611
+ rank = rank-1
612
+
613
+ # Checking if there is test data provided or not.
614
+ # If no, then model will generate predicion on default test data.
615
+ # If yes, then at first data transformation will happen then prediction will be generated.
616
+ if data is None:
617
+ metrics, pred = self.m_evaluator.model_evaluation(rank = rank,
618
+ table_name_mapping=self.table_name_mapping)
619
+ else:
620
+ # Setting test data indicator to True
621
+ self.test_data_ind = True
622
+ # Setting indicator to True if target column exists
623
+ if self.target_column in data.columns:
624
+ self.target_column_ind = True
625
+
626
+ # Data Transformation Phase
627
+ data_transform_instance = _DataTransformation(data = data,
628
+ data_transformation_params = \
629
+ self.data_transformation_params,
630
+ auto = self.auto,
631
+ verbose = self.verbose,
632
+ target_column_ind = self.target_column_ind,
633
+ table_name_mapping=self.table_name_mapping)
634
+
635
+ self.table_name_mapping = data_transform_instance.data_transformation()
636
+
637
+ # Checking for target column presence in passed test data.
638
+ # If present, then both prediction and evaluation metrics will be generated.
639
+ # If not present, then only prediction will be generated.
640
+ if self.target_column_ind:
641
+ metrics, pred = self.m_evaluator.model_evaluation(rank = rank,
642
+ test_data_ind = \
643
+ self.test_data_ind,
644
+ target_column_ind = \
645
+ self.target_column_ind,
646
+ table_name_mapping=self.table_name_mapping)
647
+ else:
648
+ pred = self.m_evaluator.model_evaluation(rank = rank,
649
+ test_data_ind = \
650
+ self.test_data_ind,
651
+ table_name_mapping=self.table_name_mapping)
652
+ # Checking if problem type is classification and target label is present.
653
+ if self.is_classification_type() and self.target_label is not None:
654
+ # Displaying target column labels
655
+ tar_dct = {}
656
+ print('\nTarget Column Mapping:')
657
+ # Iterating rows
658
+ for row in self.target_label.result.itertuples():
659
+ # Retrieving the category names of encoded target column
660
+ # row[1] contains the orginal name of cateogry
661
+ # row[2] contains the encoded value
662
+ if row[1] != 'TD_CATEGORY_COUNT':
663
+ tar_dct[row[1]] = row[2]
664
+
665
+ for key, value in tar_dct.items():
666
+ print(f"{key}: {value}")
667
+
668
+ print("\nPrediction : ")
669
+ print(pred.result)
670
+
671
+ # Showing performance metrics if there is no test data
672
+ # Or if target column is present in test data.
673
+ if not self.test_data_ind or self.target_column_ind:
674
+ print("\nPerformance Metrics : ")
675
+ print(metrics.result)
676
+
677
+ prediction_column = 'prediction' if 'prediction' in pred.result.columns else 'Prediction'
678
+
679
+ # Displaying confusion matrix and ROC-AUC for classification problem
680
+ if self.is_classification_type():
681
+ print_data = lambda data: print(data) if _is_terminal() else display(data)
682
+ # Displaying ROC-AUC for binary classification
683
+ if self.target_count == 2:
684
+ fit_params = {
685
+ "probability_column" : prediction_column,
686
+ "observation_column" : self.target_column,
687
+ "positive_class" : "1",
688
+ "data" : pred.result
689
+ }
690
+ # Fitting ROC
691
+ roc_out = ROC(**fit_params)
692
+ print("\nROC-AUC : ")
693
+ print_data(roc_out.result)
694
+ print_data(roc_out.output_data)
695
+
696
+ # Displaying confusion matrix for binary and multiclass classification
697
+ prediction_df=pred.result.to_pandas()
698
+ target_col = self.target_column
699
+ print("\nConfusion Matrix : ")
700
+ print_data(confusion_matrix(prediction_df[target_col], prediction_df[prediction_column]))
701
+
702
+ # Returning prediction
703
+ return pred.result
704
+
705
+ @collect_queryband(queryband="AutoML_leaderboard")
706
+ def leaderboard(self):
707
+ """
708
+ DESCRIPTION:
709
+ Function displays leaderboard.
710
+
711
+ RETURNS:
712
+ Pandas DataFrame with Leaderboard information.
713
+
714
+ RAISES:
715
+ TeradataMlException.
716
+
717
+ EXAMPLES:
718
+ # Create an instance of the AutoML called "automl_obj"
719
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
720
+ # Perform fit() operation on the "automl_obj".
721
+ # Generate leaderboard using leaderboard() method on "automl_obj".
722
+ >>> automl_obj.leaderboard()
723
+ """
724
+ if not self._is_fit_called:
725
+ # raise ValueError("fit() method must be called before generating leaderboard.")
726
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
727
+ "'leaderboard' method", \
728
+ "'fit' method must be called before" \
729
+ " generating leaderboard.")
730
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
731
+ return self.leader_board
732
+
733
+ @collect_queryband(queryband="AutoML_leader")
734
+ def leader(self):
735
+ """
736
+ DESCRIPTION:
737
+ Function displays best performing model.
738
+
739
+ RETURNS:
740
+ None
741
+
742
+ RAISES:
743
+ TeradataMlException.
744
+
745
+ EXAMPLES:
746
+ # Create an instance of the AutoML called "automl_obj"
747
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
748
+ # Perform fit() operation on the "automl_obj".
749
+ # Generate leaderboard using leaderboard() method on "automl_obj".
750
+ # Display best performing model using leader() method on "automl_obj".
751
+ >>> automl_obj.leader()
752
+ """
753
+ if not self._is_fit_called:
754
+ # raise ValueError("fit() method must be called before generating leader.")
755
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
756
+ "'leader' method", \
757
+ "'fit' method must be called before" \
758
+ " generating leader.")
759
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
760
+ record = self.leader_board
761
+ if not _is_terminal():
762
+ display(record[record['Rank'] == 1])
763
+ else:
764
+ print(record[record['Rank'] == 1])
765
+
766
+ @staticmethod
767
+ def generate_custom_config(file_name = "custom"):
768
+ """
769
+ DESCRIPTION:
770
+ Function generates custom JSON file containing user customized input under current
771
+ working directory which can be used for AutoML execution.
772
+
773
+ PARAMETERS:
774
+ file_name:
775
+ Optional Argument.
776
+ Specifies the name of the file to be generated. Do not pass the file name
777
+ with extension. Extension '.json' is automatically added to specified file name.
778
+ Default Value: "custom"
779
+ Types: str
780
+
781
+ RETURNS:
782
+ None
783
+
784
+ EXAMPLES:
785
+ # Import either of AutoML or AutoClassifier or AutoRegressor from teradataml.
786
+ # As per requirement, generate json file using generate_custom_config() method.
787
+
788
+ # Generate a default file named "custom.json" file using either of below options.
789
+ >>> AutoML.generate_custom_config()
790
+ or
791
+ >>> AutoClassifier.generate_custom_config()
792
+ or
793
+ >>> AutoRegressor.generate_custom_config()
794
+ # The above code will generate "custom.json" file under the current working directory.
795
+
796
+ # Generate different file name using "file_name" argument.
797
+ >>> AutoML.generate_custom_config("titanic_custom")
798
+ or
799
+ >>> AutoClassifier.generate_custom_config("titanic_custom")
800
+ or
801
+ >>> AutoRegressor.generate_custom_config("housing_custom")
802
+ # The above code will generate "titanic_custom.json" file under the current working directory.
803
+
804
+ """
805
+ # Intializing class
806
+ generator = _GenerateCustomJson()
807
+ # Generating custom JSON data
808
+ data = generator._generate_custom_json()
809
+ # Converting to JSON
810
+ custom_json = json.dumps(data, indent=4)
811
+ # Save JSON data to the specified file
812
+ json_file = f"{file_name}.json"
813
+ with open(json_file, 'w') as file:
814
+ file.write(custom_json)
815
+ print(f"\n'{json_file}' file is generated successfully under the current working directory.")
816
+
817
+
818
+ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
819
+
820
+ def __init__(self,
821
+ data,
822
+ target_column,
823
+ custom_data = None):
824
+ """
825
+ DESCRIPTION:
826
+ Function initializes the data, target column for Regression.
827
+
828
+ PARAMETERS:
829
+ data:
830
+ Required Argument.
831
+ Specifies the input teradataml Dataframe.
832
+ Types: teradataml Dataframe
833
+
834
+ target_column:
835
+ Required Argument.
836
+ Specifies the name of the target column in "data".
837
+ Types: str
838
+
839
+ custom_data:
840
+ Optional Argument.
841
+ Specifies json object containing user customized input.
842
+ Types: json object
843
+ """
844
+ self.data = data
845
+ self.target_column = target_column
846
+ self.custom_data = custom_data
847
+
848
+
849
+ def _regression(self,
850
+ model_list = None,
851
+ auto = False,
852
+ verbose = 0,
853
+ max_runtime_secs = None,
854
+ stopping_metric = None,
855
+ stopping_tolerance = None,
856
+ max_models = None):
857
+ """
858
+ DESCRIPTION:
859
+ Interal Function runs Regression.
860
+
861
+ PARAMETERS:
862
+ auto:
863
+ Optional Argument.
864
+ Specifies whether to run AutoML in custom mode or auto mode.
865
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
866
+ Types: bool
867
+
868
+ verbose:
869
+ Optional Argument.
870
+ Specifies the detailed execution steps based on verbose level.
871
+ Default Value: 0
872
+ Permitted Values:
873
+ * 0: prints the progress bar and leaderboard
874
+ * 1: prints the execution steps of AutoML.
875
+ * 2: prints the intermediate data between the execution of each step of AutoML.
876
+ Types: int
877
+
878
+ max_runtime_secs:
879
+ Optional Argument.
880
+ Specifies the time limit in seconds for model training.
881
+ Types: int
882
+
883
+ stopping_metric:
884
+ Required, when "stopping_tolerance" is set, otherwise optional.
885
+ Specifies the stopping mertics for stopping tolerance in model training.
886
+ Types: str
887
+
888
+ stopping_tolerance:
889
+ Required, when "stopping_metric" is set, otherwise optional.
890
+ Specifies the stopping tolerance for stopping metrics in model training.
891
+ Types: float
892
+
893
+ max_models:
894
+ Optional Argument.
895
+ Specifies the maximum number of models to be trained.
896
+ Types: int
897
+
898
+ RETURNS:
899
+ a tuple containing, model information and leaderboard.
900
+ """
901
+ # Feature Exploration Phase
902
+ _FeatureExplore.__init__(self,
903
+ data = self.data,
904
+ target_column = self.target_column,
905
+ verbose=verbose)
906
+ if verbose > 0:
907
+ self._exploration()
908
+ # Feature Engineering Phase
909
+ _FeatureEngineering.__init__(self,
910
+ data = self.data,
911
+ target_column = self.target_column,
912
+ model_list = model_list,
913
+ verbose = verbose,
914
+ custom_data = self.custom_data)
915
+ # Start time
916
+ start_time = time.time()
917
+ data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
918
+
919
+ # Data preparation Phase
920
+ _DataPreparation.__init__(self,
921
+ data = self.data,
922
+ target_column = self.target_column,
923
+ verbose = verbose,
924
+ excluded_columns = excluded_columns,
925
+ custom_data = self.custom_data,
926
+ data_transform_dict = data_transformation_params)
927
+ features, data_transformation_params = self.data_preparation(auto)
928
+
929
+ # Calculating max_runtime_secs for model training by,
930
+ # subtracting the time taken for feature engineering and data preparation
931
+ max_runtime_secs = max_runtime_secs - (time.time() - start_time) \
932
+ if max_runtime_secs is not None else None
933
+
934
+ # Setting max_runtime_secs to 60 seconds if it is less than 0
935
+ max_runtime_secs = 60 if max_runtime_secs is not None and \
936
+ max_runtime_secs < 0 else max_runtime_secs
937
+
938
+ # Model Training
939
+ _ModelTraining.__init__(self,
940
+ data = self.data,
941
+ target_column = self.target_column,
942
+ model_list = model_list,
943
+ verbose = verbose,
944
+ features = features,
945
+ task_type = "Regression",
946
+ custom_data = self.custom_data)
947
+ models_info, leaderboard, target_count = self.model_training(auto = auto,
948
+ max_runtime_secs = max_runtime_secs,
949
+ stopping_metric = stopping_metric,
950
+ stopping_tolerance = stopping_tolerance,
951
+ max_models = max_models)
952
+
953
+ return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
954
+
955
+ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
956
+
957
+ def __init__(self,
958
+ data,
959
+ target_column,
960
+ custom_data = None):
961
+ """
962
+ DESCRIPTION:
963
+ Function initializes the data, target column for Classification.
964
+
965
+ PARAMETERS:
966
+ data:
967
+ Required Argument.
968
+ Specifies the input teradataml Dataframe.
969
+ Types: teradataml Dataframe
970
+
971
+ target_column:
972
+ Required Argument.
973
+ Specifies the name of the target column in "data".
974
+ Types: str
975
+
976
+ custom_data:
977
+ Optional Argument.
978
+ Specifies json object containing user customized input.
979
+ Types: json object
980
+ """
981
+ self.data = data
982
+ self.target_column = target_column
983
+ self.custom_data = custom_data
984
+
985
+ def _classification(self,
986
+ model_list = None,
987
+ auto = False,
988
+ verbose = 0,
989
+ max_runtime_secs = None,
990
+ stopping_metric = None,
991
+ stopping_tolerance = None,
992
+ max_models = None):
993
+ """
994
+ DESCRIPTION:
995
+ Interal Function runs Classification.
996
+
997
+ PARAMETERS:
998
+ auto:
999
+ Optional Argument.
1000
+ Specifies whether to run AutoML in custom mode or auto mode.
1001
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
1002
+ Types: bool
1003
+
1004
+ verbose:
1005
+ Optional Argument.
1006
+ Specifies the detailed execution steps based on verbose level.
1007
+ Default Value: 0
1008
+ Permitted Values:
1009
+ * 0: prints the progress bar and leaderboard
1010
+ * 1: prints the execution steps of AutoML.
1011
+ * 2: prints the intermediate data between the execution of each step of AutoML.
1012
+ Types: int
1013
+
1014
+ max_runtime_secs:
1015
+ Optional Argument.
1016
+ Specifies the time limit in seconds for model training.
1017
+ Types: int
1018
+
1019
+ stopping_metric:
1020
+ Required, when "stopping_tolerance" is set, otherwise optional.
1021
+ Specifies the stopping mertics for stopping tolerance in model training.
1022
+ Types: str
1023
+
1024
+ stopping_tolerance:
1025
+ Required, when "stopping_metric" is set, otherwise optional.
1026
+ Specifies the stopping tolerance for stopping metrics in model training.
1027
+ Types: float
1028
+
1029
+ max_models:
1030
+ Optional Argument.
1031
+ Specifies the maximum number of models to be trained.
1032
+ Types: int
1033
+
1034
+ RETURNS:
1035
+ a tuple containing, model information and leaderboard.
1036
+ """
1037
+
1038
+
1039
+ # Feature Exploration Phase
1040
+ _FeatureExplore.__init__(self,
1041
+ data = self.data,
1042
+ target_column = self.target_column,
1043
+ verbose=verbose)
1044
+ if verbose > 0:
1045
+ self._exploration()
1046
+ # Feature Engineeting Phase
1047
+ _FeatureEngineering.__init__(self,
1048
+ data = self.data,
1049
+ target_column = self.target_column,
1050
+ model_list = model_list,
1051
+ verbose = verbose,
1052
+ task_type = "Classification",
1053
+ custom_data = self.custom_data)
1054
+ # Start time
1055
+ start_time = time.time()
1056
+ data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
1057
+ # Data Preparation Phase
1058
+ _DataPreparation.__init__(self,
1059
+ data = self.data,
1060
+ target_column = self.target_column,
1061
+ verbose = verbose,
1062
+ excluded_columns = excluded_columns,
1063
+ custom_data = self.custom_data,
1064
+ data_transform_dict = data_transformation_params,
1065
+ task_type = "Classification")
1066
+ features, data_transformation_params = self.data_preparation(auto)
1067
+
1068
+ # Calculating max_runtime_secs for model training by,
1069
+ # subtracting the time taken for feature engineering and data preparation
1070
+ max_runtime_secs = max_runtime_secs - (time.time() - start_time) \
1071
+ if max_runtime_secs is not None else None
1072
+
1073
+ # Setting max_runtime_secs to 60 seconds if it is less than 0
1074
+ max_runtime_secs = 60 if max_runtime_secs is not None and \
1075
+ max_runtime_secs < 0 else max_runtime_secs
1076
+
1077
+ # Model training
1078
+ _ModelTraining.__init__(self,
1079
+ data = self.data,
1080
+ target_column = self.target_column,
1081
+ model_list = model_list,
1082
+ verbose = verbose,
1083
+ features = features,
1084
+ task_type = "Classification",
1085
+ custom_data = self.custom_data)
1086
+ models_info, leaderboard, target_count = self.model_training(auto = auto,
1087
+ max_runtime_secs = max_runtime_secs,
1088
+ stopping_metric = stopping_metric,
1089
+ stopping_tolerance = stopping_tolerance,
1090
+ max_models = max_models)
1091
+
1092
+ return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
1093
+
1094
+ def _target_column_details(self):
1095
+ """
1096
+ DESCRIPTION:
1097
+ Internal function displays the target column distribution of Target column/ Response column.
1098
+ """
1099
+ # If data visualization libraries are available
1100
+ if self._check_visualization_libraries() and not _is_terminal():
1101
+ import matplotlib.pyplot as plt
1102
+ import seaborn as sns
1103
+ self._display_msg(msg='\nTarget Column Distribution:',
1104
+ show_data=True)
1105
+ plt.figure(figsize=(6, 6))
1106
+ # Ploting a histogram for target column
1107
+ sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
1108
+ plt.show()
1109
+
1110
+ def _check_data_imbalance(self,
1111
+ data=None):
1112
+ """
1113
+ DESCRIPTION:
1114
+ Internal function calculate and checks the imbalance in dataset.
1115
+
1116
+ PARAMETERS:
1117
+ data:
1118
+ Required Argument.
1119
+ Specifies the input teradataml DataFrame.
1120
+ Types: teradataml Dataframe
1121
+
1122
+ RETURNS:
1123
+ bool, True if imbalance dataset detected, Otherwise False.
1124
+ """
1125
+ self._display_msg(msg="\nChecking imbalance data ...",
1126
+ progress_bar=self.progress_bar)
1127
+ # Calculate the distribution of classes in the target column
1128
+ class_dist = data[self.target_column].value_counts().values
1129
+
1130
+ # Find the minimum count of data points among the classes
1131
+ min_ct = np.min(class_dist)
1132
+
1133
+ # Find the maximum count of data points among the classes
1134
+ max_ct = np.max(class_dist)
1135
+
1136
+ # Calculate the imbalance ratio(minimum count to maximum count)
1137
+ imb_ratio = min_ct / max_ct
1138
+
1139
+ # Check if the imbalance ratio less than the threshold of 0.4
1140
+ if imb_ratio < 0.4:
1141
+ self._display_msg(msg="Imbalance Found.",
1142
+ progress_bar=self.progress_bar)
1143
+ return True
1144
+
1145
+ self._display_msg(msg="Imbalance Not Found.",
1146
+ progress_bar=self.progress_bar)
1147
+ return False
1148
+
1149
+ def _set_custom_sampling(self):
1150
+ """
1151
+ DESCRIPTION:
1152
+ Function to handle customized data sampling for imbalance dataset.
1153
+ """
1154
+ # Fetching user input for data sampling
1155
+ data_imbalance_input = self.custom_data.get("DataImbalanceIndicator", False)
1156
+ if data_imbalance_input:
1157
+ # Extracting method for performing data sampling
1158
+ handling_method = self.custom_data.get("DataImbalanceMethod", None)
1159
+ if handling_method == 'SMOTE':
1160
+ self._data_sampling_method = "SMOTE"
1161
+ elif handling_method == 'NearMiss':
1162
+ self._data_sampling_method = "NearMiss"
1163
+ else:
1164
+ self._display_msg(inline_msg="Provided method for data imbalance is not supported. AutoML will Proceed with default option.",
1165
+ progress_bar=self.progress_bar)
1166
+ else:
1167
+ self._display_msg(inline_msg="No information provided for performing customized imbalanced dataset sampling. AutoML will Proceed with default option.",
1168
+ progress_bar=self.progress_bar)
1169
+
1170
+ def _data_sampling(self,
1171
+ data):
1172
+ """
1173
+ DESCRIPTION:
1174
+ Function to handle data imbalance in dataset using sampling techniques
1175
+ in case of classification.
1176
+
1177
+ PARAMETERS:
1178
+ data:
1179
+ Required Argument.
1180
+ Specifies the input teradataml DataFrame.
1181
+ Types: pandas Dataframe.
1182
+
1183
+ RETURNS:
1184
+ Teradataml dataframe after handling data imbalance.
1185
+ """
1186
+ self._display_msg(msg="\nStarting data imbalance handling ...",
1187
+ progress_bar=self.progress_bar,
1188
+ show_data=True)
1189
+
1190
+ # Importing required libraries
1191
+ from imblearn.over_sampling import SMOTE
1192
+ from imblearn.under_sampling import NearMiss
1193
+
1194
+ st = time.time()
1195
+ self._display_msg(msg=f"\nBalancing the data using {self._data_sampling_method}...",
1196
+ progress_bar=self.progress_bar,
1197
+ show_data=True)
1198
+ # Performing data sampling
1199
+ try:
1200
+ # Fetching the minimum target column label count and
1201
+ # accordingly setting the number of neighbors for the sampler
1202
+ min_label_count = min(data[self.target_column].value_counts())
1203
+ if self._data_sampling_method == 'SMOTE':
1204
+ n_neighbors = min(5, min_label_count - 1)
1205
+ sampling_method = SMOTE(k_neighbors=n_neighbors, random_state=42)
1206
+ else:
1207
+ n_neighbors = min(3, min_label_count)
1208
+ sampling_method = NearMiss(version=1, n_neighbors=n_neighbors)
1209
+
1210
+ # Fitting on dataset
1211
+ xt, yt = sampling_method.fit_resample(data.drop(columns=[self.target_column], axis=1),
1212
+ data[self.target_column])
1213
+
1214
+ # Merging the balanced dataset with target column
1215
+ balanced_df = (xt.reset_index().merge(yt.reset_index(), on="index"))
1216
+ balanced_df.drop(columns=['index', 'id'], axis=1, inplace=True)
1217
+ balanced_df = balanced_df.reset_index().rename(columns={'index': 'id'})
1218
+
1219
+ et = time.time()
1220
+ self._display_msg(msg=f"Handled imbalanced dataset using {self._data_sampling_method}: {et - st:.2f} sec",
1221
+ progress_bar=self.progress_bar,
1222
+ show_data=True)
1223
+ except:
1224
+ self._display_msg(msg=f"Balancing using {self._data_sampling_method} Failed!!",
1225
+ progress_bar=self.progress_bar,
1226
+ show_data=True)
1227
+ # Returning original data if the data sampler fails
1228
+ return data
1229
+
1230
+ self._display_msg(msg="Completed data imbalance handling.",
1231
+ progress_bar=self.progress_bar,
1232
+ show_data=True)
1233
+ # Returning balanced dataframe
1234
+ return balanced_df
1235
+
1236
+ class AutoRegressor(AutoML):
1237
+
1238
+ def __init__(self,
1239
+ include = None,
1240
+ exclude = None,
1241
+ verbose=0,
1242
+ max_runtime_secs=None,
1243
+ stopping_metric=None,
1244
+ stopping_tolerance=None,
1245
+ max_models=None,
1246
+ custom_config_file=None
1247
+ ):
1248
+ """
1249
+ DESCRIPTION:
1250
+ AutoRegressor is a special purpose AutoML feature to run regression specific tasks.
1251
+
1252
+ PARAMETERS:
1253
+ include:
1254
+ Optional Argument.
1255
+ Specifies the model algorithms to be used for model training phase.
1256
+ By default, all 5 models are used for training for regression and binary
1257
+ classification problem, while only 3 models are used for multi-class.
1258
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1259
+ Types: str OR list of str
1260
+
1261
+ exclude:
1262
+ Optional Argument.
1263
+ Specifies the model algorithms to be excluded from model training phase.
1264
+ No model is excluded by default.
1265
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1266
+ Types: str OR list of str
1267
+
1268
+ verbose:
1269
+ Optional Argument.
1270
+ Specifies the detailed execution steps based on verbose level.
1271
+ Default Value: 0
1272
+ Permitted Values:
1273
+ * 0: prints the progress bar and leaderboard
1274
+ * 1: prints the execution steps of AutoML.
1275
+ * 2: prints the intermediate data between the execution of each step of AutoML.
1276
+ Types: int
1277
+
1278
+ max_runtime_secs:
1279
+ Optional Argument.
1280
+ Specifies the time limit in seconds for model training.
1281
+ Types: int
1282
+
1283
+ stopping_metric:
1284
+ Required, when "stopping_tolerance" is set, otherwise optional.
1285
+ Specifies the stopping mertics for stopping tolerance in model training.
1286
+ Permitted Values:
1287
+ * For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
1288
+ "RMSE", "RMSLE"
1289
+ * For task_type "Classification": 'MICRO-F1','MACRO-F1',
1290
+ 'MICRO-RECALL','MACRO-RECALL',
1291
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
1292
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
1293
+ 'WEIGHTED-F1', 'ACCURACY'
1294
+ Types: str
1295
+
1296
+ stopping_tolerance:
1297
+ Required, when "stopping_metric" is set, otherwise optional.
1298
+ Specifies the stopping tolerance for stopping metrics in model training.
1299
+ Types: float
1300
+
1301
+ max_models:
1302
+ Optional Argument.
1303
+ Specifies the maximum number of models to be trained.
1304
+ Types: int
1305
+
1306
+ custom_config_file:
1307
+ Optional Argument.
1308
+ Specifies the path of JSON file in case of custom run.
1309
+ Types: str
1310
+
1311
+ RETURNS:
1312
+ Instance of AutoRegressor.
1313
+
1314
+ RAISES:
1315
+ TeradataMlException, TypeError, ValueError
1316
+
1317
+ EXAMPLES:
1318
+ # Notes:
1319
+ # 1. Get the connection to Vantage to execute the function.
1320
+ # 2. One must import the required functions mentioned in
1321
+ # the example from teradataml.
1322
+ # 3. Function will raise error if not supported on the Vantage
1323
+ # user is connected to.
1324
+
1325
+ # Load the example data.
1326
+ >>> load_example_data("decisionforestpredict", ["housing_train", "housing_test"])
1327
+
1328
+ # Create teradataml DataFrame object.
1329
+ >>> housing_train = DataFrame.from_table("housing_train")
1330
+
1331
+ # Example 1 : Run AutoRegressor using default options.
1332
+ # Scenario : Predict the price of house based on different factors.
1333
+
1334
+ # Create instance of AutoRegressor.
1335
+ >>> automl_obj = AutoRegressor()
1336
+
1337
+ # Fit the data.
1338
+ >>> automl_obj.fit(housing_train, "price")
1339
+
1340
+ # Predict using best performing model.
1341
+ >>> prediction = automl_obj.predict()
1342
+ >>> prediction
1343
+
1344
+ # Run predict for new test data with best performing model.
1345
+ >>> prediction = automl_obj.predict(housing_test)
1346
+ >>> prediction
1347
+
1348
+ # Run predict for new test data with second best performing model.
1349
+ >>> prediction = automl_obj.predict(housing_test, rank=2)
1350
+ >>> prediction
1351
+
1352
+ # Display leaderboard.
1353
+ >>> automl_obj.leaderboard()
1354
+
1355
+ # Display best performing model.
1356
+ >>> automl_obj.leader()
1357
+
1358
+ # Example 2 : Run AutoRegressor for regression problem with early stopping metric and tolerance.
1359
+ # Scenario : Predict the price of house based on different factors.
1360
+ # Use custom configuration file to customize different
1361
+ # processes of AutoML Run. Define performance threshold
1362
+ # to acquire for the available models, and terminate training
1363
+ # upon meeting the stipulated performance criteria.
1364
+
1365
+ # Generate custom configuration file.
1366
+ >>> AutoRegressor.generate_custom_config("custom_housing")
1367
+
1368
+ # Create instance of AutoRegressor.
1369
+ >>> automl_obj = AutoRegressor(verbose=2,
1370
+ >>> exclude="xgboost",
1371
+ >>> stopping_metric="R2",
1372
+ >>> stopping_tolerance=0.7,
1373
+ >>> max_models=10,
1374
+ >>> custom_config_file="custom_housing.json")
1375
+ # Fit the data.
1376
+ >>> automl_obj.fit(housing_train, "price")
1377
+
1378
+ # Run predict with best performing model.
1379
+ >>> prediction = automl_obj.predict()
1380
+ >>> prediction
1381
+
1382
+ # Display leaderboard.
1383
+ >>> automl_obj.leaderboard()
1384
+
1385
+ # Example 3 : Run AutoRegressor for regression problem with maximum runtime.
1386
+ # Scenario : Predict the price of house based on different factors.
1387
+ # Run AutoML to get the best performing model in specified time.
1388
+
1389
+ # Create instance of AutoRegressor.
1390
+ >>> automl_obj = AutoRegressor(verbose=2,
1391
+ >>> exclude="xgboost",
1392
+ >>> max_runtime_secs=500)
1393
+ # Fit the data.
1394
+ >>> automl_obj.fit(housing_train, "price")
1395
+
1396
+ # Run predict with best performing model.
1397
+ >>> prediction = automl_obj.predict()
1398
+ >>> prediction
1399
+
1400
+ # Run predict with second best performing model.
1401
+ >>> prediction = automl_obj.predict(rank=2)
1402
+ >>> prediction
1403
+
1404
+ # Display leaderboard.
1405
+ >>> automl_obj.leaderboard()
1406
+
1407
+ # Display best performing model.
1408
+ >>> automl_obj.leader()
1409
+ """
1410
+ self.verbose = verbose
1411
+ self.max_runtime_secs = max_runtime_secs
1412
+ self.stopping_metric = stopping_metric
1413
+ self.stopping_tolerance = stopping_tolerance
1414
+ self.max_models = max_models
1415
+ self.custom_config_file = custom_config_file
1416
+ self.task_type = "Regression"
1417
+ self.include = include
1418
+ self.exclude = exclude
1419
+
1420
+ super(AutoRegressor, self).__init__(task_type=self.task_type,
1421
+ include = self.include,
1422
+ exclude = self.exclude,
1423
+ verbose=self.verbose,
1424
+ max_runtime_secs=self.max_runtime_secs,
1425
+ stopping_metric=self.stopping_metric,
1426
+ stopping_tolerance=self.stopping_tolerance,
1427
+ max_models=self.max_models,
1428
+ custom_config_file=self.custom_config_file)
1429
+ class AutoClassifier(AutoML):
1430
+
1431
+ def __init__(self,
1432
+ include = None,
1433
+ exclude = None,
1434
+ verbose=0,
1435
+ max_runtime_secs=None,
1436
+ stopping_metric=None,
1437
+ stopping_tolerance=None,
1438
+ max_models=None,
1439
+ custom_config_file=None
1440
+ ):
1441
+ """
1442
+ DESCRIPTION:
1443
+ AutoClassifier is a special purpose AutoML feature to run classification specific tasks.
1444
+
1445
+ PARAMETERS:
1446
+ include:
1447
+ Optional Argument.
1448
+ Specifies the model algorithms to be used for model training phase.
1449
+ By default, all 5 models are used for training for regression and binary
1450
+ classification problem, while only 3 models are used for multi-class.
1451
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1452
+ Types: str OR list of str
1453
+
1454
+ exclude:
1455
+ Optional Argument.
1456
+ Specifies the model algorithms to be excluded from model training phase.
1457
+ No model is excluded by default.
1458
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1459
+ Types: str OR list of str
1460
+
1461
+ verbose:
1462
+ Optional Argument.
1463
+ Specifies the detailed execution steps based on verbose level.
1464
+ Default Value: 0
1465
+ Permitted Values:
1466
+ * 0: prints the progress bar and leaderboard
1467
+ * 1: prints the execution steps of AutoML.
1468
+ * 2: prints the intermediate data between the execution of each step of AutoML.
1469
+ Types: int
1470
+
1471
+ max_runtime_secs:
1472
+ Optional Argument.
1473
+ Specifies the time limit in seconds for model training.
1474
+ Types: int
1475
+
1476
+ stopping_metric:
1477
+ Required, when "stopping_tolerance" is set, otherwise optional.
1478
+ Specifies the stopping mertics for stopping tolerance in model training.
1479
+ Permitted Values:
1480
+ * For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
1481
+ "RMSE", "RMSLE"
1482
+ * For task_type "Classification": 'MICRO-F1','MACRO-F1',
1483
+ 'MICRO-RECALL','MACRO-RECALL',
1484
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
1485
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
1486
+ 'WEIGHTED-F1', 'ACCURACY'
1487
+ Types: str
1488
+
1489
+ stopping_tolerance:
1490
+ Required, when "stopping_metric" is set, otherwise optional.
1491
+ Specifies the stopping tolerance for stopping metrics in model training.
1492
+ Types: float
1493
+
1494
+ max_models:
1495
+ Optional Argument.
1496
+ Specifies the maximum number of models to be trained.
1497
+ Types: int
1498
+
1499
+ custom_config_file:
1500
+ Optional Argument.
1501
+ Specifies the path of json file in case of custom run.
1502
+ Types: str
1503
+
1504
+ RETURNS:
1505
+ Instance of AutoClassifier.
1506
+
1507
+ RAISES:
1508
+ TeradataMlException, TypeError, ValueError
1509
+
1510
+ EXAMPLES:
1511
+ # Notes:
1512
+ # 1. Get the connection to Vantage to execute the function.
1513
+ # 2. One must import the required functions mentioned in
1514
+ # the example from teradataml.
1515
+ # 3. Function will raise error if not supported on the Vantage
1516
+ # user is connected to.
1517
+
1518
+ # Load the example data.
1519
+ >>> load_example_data("teradataml", ["titanic", "iris_input"])
1520
+ >>> load_example_data("GLMPredict", ["admissions_test", "admissions_train"])
1521
+
1522
+ # Create teradataml DataFrame object.
1523
+ >>> admissions_train = DataFrame.from_table("admissions_train")
1524
+ >>> titanic = DataFrame.from_table("titanic")
1525
+ >>> iris_input = DataFrame.from_table("iris_input")
1526
+ >>> admissions_test = DataFrame.from_table("admissions_test")
1527
+
1528
+ # Example 1 : Run AutoClassifier for binary classification problem
1529
+ # Scenario : Predict whether a student will be admitted to a university
1530
+ # based on different factors. Run AutoML to get the best performing model
1531
+ # out of available models.
1532
+
1533
+ # Create instance of AutoClassifier..
1534
+ >>> automl_obj = AutoClassifier()
1535
+
1536
+ # Fit the data.
1537
+ >>> automl_obj.fit(admissions_train, "admitted")
1538
+
1539
+ # Predict using best performing model.
1540
+ >>> prediction = automl_obj.predict()
1541
+ >>> prediction
1542
+
1543
+ # Run predict for new test data with best performing model.
1544
+ >>> prediction = automl_obj.predict(admissions_test)
1545
+ >>> prediction
1546
+
1547
+ # Run predict for new test data with second best performing model.
1548
+ >>> prediction = automl_obj.predict(admissions_test, rank=2)
1549
+ >>> prediction
1550
+
1551
+ # Display leaderboard.
1552
+ >>> automl_obj.leaderboard()
1553
+
1554
+ # Display best performing model.
1555
+ >>> automl_obj.leader()
1556
+
1557
+ # Example 2 : Run AutoClassifier for binary classification.
1558
+ # Scenario : Predict whether passenger aboard the RMS Titanic survived
1559
+ # or not based on differect factors. Run AutoML to get the
1560
+ # best performing model out of available models. Use custom
1561
+ # configuration file to customize different processes of
1562
+ # AutoML Run.
1563
+
1564
+ # Generate custom configuration file.
1565
+ >>> AutoClassifier.generate_custom_config("custom_titanic")
1566
+
1567
+ # Create instance of AutoClassifier.
1568
+ >>> automl_obj = AutoClassifier(verbose=2,
1569
+ >>> custom_config_file="custom_titanic.json")
1570
+ # Fit the data.
1571
+ >>> automl_obj.fit(titanic, titanic.survived)
1572
+
1573
+ # Run predict with best performing model.
1574
+ >>> prediction = automl_obj.predict()
1575
+ >>> prediction
1576
+
1577
+ # Run predict with second best performing model.
1578
+ >>> prediction = automl_obj.predict(rank=2)
1579
+ >>> prediction
1580
+
1581
+ # Display leaderboard.
1582
+ >>> automl_obj.leaderboard()
1583
+
1584
+ # Display best performing model.
1585
+ >>> automl_obj.leader()
1586
+
1587
+ # Example 3 : Run AutoClassifier for multiclass classification problem.
1588
+ # Scenario : Predict the species of iris flower based on different factors.
1589
+ # Run AutoML to get the best performing model out of available
1590
+ # models. Use custom configuration file to customize different
1591
+ # processes of AutoML Run.
1592
+
1593
+ # Generate custom configuration file.
1594
+ >>> AutoClassifier.generate_custom_config("custom_iris")
1595
+
1596
+ # Create instance of AutoClassifier.
1597
+ >>> automl_obj = AutoClassifier(verbose=1,
1598
+ >>> custom_config_file="custom_iris.json")
1599
+ # Fit the data.
1600
+ >>> automl_obj.fit(iris_input, "species")
1601
+
1602
+ # Predict using best performing model.
1603
+ >>> prediction = automl_obj.predict()
1604
+ >>> prediction
1605
+
1606
+ # Display leaderboard.
1607
+ >>> automl_obj.leaderboard()
1608
+
1609
+ # Display best performing model.
1610
+ >>> automl_obj.leader()
1611
+
1612
+ # Example 4 : Run AutoClassifier for classification problem with stopping metric and tolerance.
1613
+ # Scenario : Predict whether passenger aboard the RMS Titanic survived
1614
+ # or not based on differect factors. Use custom configuration
1615
+ # file to customize different processes of AutoML Run. Define
1616
+ # performance threshold to acquire for the available models, and
1617
+ # terminate training upon meeting the stipulated performance criteria.
1618
+
1619
+ # Generate custom configuration file.
1620
+ >>> AutoClassifier.generate_custom_config("custom_titanic")
1621
+
1622
+ # Create instance of AutoClassifier.
1623
+ >>> automl_obj = AutoClassifier(verbose=2,
1624
+ >>> exclude="xgboost",
1625
+ >>> stopping_metric="MICRO-F1",
1626
+ >>> stopping_tolerance=0.7,
1627
+ >>> max_models=8
1628
+ >>> custom_config_file="custom_titanic.json")
1629
+ # Fit the data.
1630
+ >>> automl_obj.fit(titanic, titanic.survived)
1631
+
1632
+ # Run predict with best performing model.
1633
+ >>> prediction = automl_obj.predict()
1634
+ >>> prediction
1635
+
1636
+ # Display leaderboard.
1637
+ >>> automl_obj.leaderboard()
1638
+
1639
+ # Example 5 : Run AutoClassifier for classification problem with maximum runtime.
1640
+ # Scenario : Predict the species of iris flower based on different factors.
1641
+ # Run AutoML to get the best performing model in specified time.
1642
+
1643
+ # Create instance of AutoClassifier.
1644
+ >>> automl_obj = AutoClassifier(verbose=2,
1645
+ >>> exclude="xgboost",
1646
+ >>> max_runtime_secs=500)
1647
+ >>> max_models=3)
1648
+ # Fit the data.
1649
+ >>> automl_obj.fit(iris_input, iris_input.species)
1650
+
1651
+ # Run predict with best performing model.
1652
+ >>> prediction = automl_obj.predict()
1653
+ >>> prediction
1654
+
1655
+ # Run predict with second best performing model.
1656
+ >>> prediction = automl_obj.predict(rank=2)
1657
+ >>> prediction
1658
+
1659
+ # Display leaderboard.
1660
+ >>> automl_obj.leaderboard()
1661
+
1662
+ # Display best performing model.
1663
+ >>> automl_obj.leader()
1664
+ """
1665
+ self.verbose = verbose
1666
+ self.max_runtime_secs = max_runtime_secs
1667
+ self.stopping_metric = stopping_metric
1668
+ self.stopping_tolerance = stopping_tolerance
1669
+ self.max_models = max_models
1670
+ self.custom_config_file = custom_config_file
1671
+ self.task_type = "Classification"
1672
+ self.include = include
1673
+ self.exclude = exclude
1674
+
1675
+ super(AutoClassifier, self).__init__(task_type=self.task_type,
1676
+ include = self.include,
1677
+ exclude = self.exclude,
1678
+ verbose=self.verbose,
1679
+ max_runtime_secs=self.max_runtime_secs,
1680
+ stopping_metric=self.stopping_metric,
1681
+ stopping_tolerance=self.stopping_tolerance,
1682
+ max_models=self.max_models,
1683
+ custom_config_file=self.custom_config_file)