teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -1,603 +1,794 @@
1
- #!/usr/bin/python
2
- # ##################################################################
3
- #
4
- # Copyright 2019 Teradata. All rights reserved.
5
- # TERADATA CONFIDENTIAL AND TRADE SECRET
6
- #
7
- # Primary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
8
- # Secondary Owner:
9
- #
10
- # ##################################################################
11
-
12
- import re
13
- import datetime
14
- import warnings
15
- import pandas as pd
16
-
17
- from sqlalchemy import MetaData, Table, Column
18
- from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
19
- from teradataml.dataframe import dataframe
20
- from teradataml.context.context import *
21
- from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
22
- from teradataml.common.constants import TeradataConstants, DriverEscapeFunctions
23
- from teradataml.common.utils import UtilFuncs
24
- from teradataml.common.garbagecollector import GarbageCollector
25
- from teradataml.utils.validators import _Validators
26
- from teradataml.dataframe.copy_to import copy_to_sql, \
27
- _validate_pti_copy_parameters, _create_table_object, \
28
- _create_pti_table_object, _extract_column_info, \
29
- _check_columns_insertion_compatible
30
- from teradataml.dataframe.data_transfer import _DataTransferUtils
31
-
32
-
33
- def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
34
- index_label=None, primary_index=None, types=None, batch_size=None,
35
- save_errors=False, open_sessions=None):
36
- """
37
- The fastload() API writes records from a Pandas DataFrame to Teradata Vantage
38
- using Fastload. FastLoad API can be used to quickly load large amounts of data
39
- in an empty table on Vantage.
40
- 1. Teradata recommends to use this API when number rows in the Pandas DataFrame
41
- is greater than 100,000 to have better performance. To insert lesser rows,
42
- please use copy_to_sql for optimized performance. The data is loaded in batches.
43
- 2. FastLoad API cannot load duplicate rows in the DataFrame if the table is a
44
- MULTISET table having primary index.
45
- 3. FastLoad API does not support all Teradata Advanced SQL Engine data types.
46
- For example, target table having BLOB and CLOB data type columns cannot be
47
- loaded.
48
- 4. If there are any incorrect rows i.e. due to constraint violations, data type
49
- conversion errors, etc., FastLoad protocol ignores those rows and inserts
50
- all valid rows.
51
- 5. Rows in the DataFrame that failed to get inserted are categorized into errors
52
- and warnings by FastLoad protocol and these errors and warnings are stored
53
- into respective error and warning tables by FastLoad API.
54
- 6. If save_errors argument is True, the names of error and warning tables are
55
- shown once the fastload operation is complete. These tables will be persisted
56
- using copy_to_sql.
57
-
58
- For additional information about FastLoad protocol through teradatasql driver,
59
- please refer the FASTLOAD section of https://pypi.org/project/teradatasql/#FastLoad
60
- driver documentation for more information.
61
-
62
- PARAMETERS:
63
- df:
64
- Required Argument.
65
- Specifies the Pandas DataFrame object to be saved in Vantage.
66
- Types: pandas.DataFrame
67
-
68
- table_name:
69
- Required Argument.
70
- Specifies the name of the table to be created in Vantage.
71
- Types: String
72
-
73
- schema_name:
74
- Optional Argument.
75
- Specifies the name of the database schema in Vantage to write to.
76
- Types: String
77
- Default: None (Uses default database schema).
78
-
79
- if_exists:
80
- Optional Argument.
81
- Specifies the action to take when table already exists in Vantage.
82
- Types: String
83
- Possible values: {'fail', 'replace', 'append'}
84
- - fail: If table exists, raise TeradataMlException.
85
- - replace: If table exists, drop it, recreate it, and insert data.
86
- - append: If table exists, insert data. Create if does not exist.
87
- Default: replace
88
-
89
- index:
90
- Optional Argument.
91
- Specifies whether to save Pandas DataFrame index as a column or not.
92
- Types: Boolean (True or False)
93
- Default: False
94
-
95
- index_label:
96
- Optional Argument.
97
- Specifies the column label(s) for Pandas DataFrame index column(s).
98
- Types: String or list of strings
99
- Default: None
100
-
101
- primary_index:
102
- Optional Argument.
103
- Specifies which column(s) to use as primary index while creating table
104
- in Vantage. When set to None, No Primary Index (NoPI) tables are created.
105
- Types: String or list of strings
106
- Default: None
107
- Example:
108
- primary_index = 'my_primary_index'
109
- primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
110
-
111
- types:
112
- Optional Argument.
113
- Specifies the data types for requested columns to be saved in Vantage.
114
- Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
115
- Default: None
116
-
117
- Note:
118
- 1. This argument accepts a dictionary of columns names and their required
119
- teradatasqlalchemy types as key-value pairs, allowing to specify a subset
120
- of the columns of a specific type.
121
- i) When only a subset of all columns are provided, the column types
122
- for the rest are assigned appropriately.
123
- ii) When types argument is not provided, the column types are assigned
124
- as listed in the following table:
125
- +---------------------------+-----------------------------------------+
126
- | Pandas/Numpy Type | teradatasqlalchemy Type |
127
- +---------------------------+-----------------------------------------+
128
- | int32 | INTEGER |
129
- +---------------------------+-----------------------------------------+
130
- | int64 | BIGINT |
131
- +---------------------------+-----------------------------------------+
132
- | bool | BYTEINT |
133
- +---------------------------+-----------------------------------------+
134
- | float32/float64 | FLOAT |
135
- +---------------------------+-----------------------------------------+
136
- | datetime64/datetime64[ns] | TIMESTAMP |
137
- +---------------------------+-----------------------------------------+
138
- | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
139
- +---------------------------+-----------------------------------------+
140
- | Any other data type | VARCHAR(configure.default_varchar_size) |
141
- +---------------------------+-----------------------------------------+
142
- 2. This argument does not have any effect when the table specified using
143
- table_name and schema_name exists and if_exists = 'append'.
144
-
145
- batch_size:
146
- Optional Argument.
147
- Specifies the number of rows to be loaded in a batch. For better performance,
148
- recommended batch size is at least 100,000. batch_size must be a positive integer.
149
- If this argument is None, there are two cases based on the number of
150
- rows, say N in the dataframe 'df' as explained below:
151
- If N is greater than 100,000, the rows are divided into batches of
152
- equal size with each batch having at least 100,000 rows (except the
153
- last batch which might have more rows). If N is less than 100,000, the
154
- rows are inserted in one batch after notifying the user that insertion
155
- happens with degradation of performance.
156
- If this argument is not None, the rows are inserted in batches of size
157
- given in the argument, irrespective of the recommended batch size.
158
- The last batch will have rows less than the batch size specified, if the
159
- number of rows is not an integral multiples of the argument batch_size.
160
- Default Value: None
161
- Types: int
162
-
163
- save_errors:
164
- Optional Argument.
165
- Specifies whether to persist the error/warning information in Vantage
166
- or not. If save_errors is set to False, error/warnings are not persisted
167
- as tables. If argument is set to True, the error and warnings information
168
- are presisted and names of error and warning tables are returned. Otherwise,
169
- the function returns None for the names of the tables.
170
- Default Value: False
171
- Types: bool
172
-
173
- open_sessions:
174
- Optional Argument.
175
- Specifies the number of Teradata data transfer sessions to be opened for fastload operation.
176
- Note : If "open_sessions" argument is not provided, the default value is the smaller of 8 or the
177
- number of AMPs available.
178
- For additional information about number of Teradata data-transfer
179
- sessions opened during fastload, please refer to:
180
- https://pypi.org/project/teradatasql/#FastLoad
181
- Default Value: None
182
- Types: int
183
-
184
- RETURNS:
185
- A dict containing the following attributes:
186
- 1. errors_dataframe: It is a Pandas DataFrame containing error messages
187
- thrown by fastload. DataFrame is empty if there are no errors.
188
- 2. warnings_dataframe: It is a Pandas DataFrame containing warning messages
189
- thrown by fastload. DataFrame is empty if there are no warnings.
190
- 3. errors_table: Name of the table containing errors. It is None, if
191
- argument save_errors is False.
192
- 4. warnings_table: Name of the table containing warnings. It is None, if
193
- argument save_errors is False.
194
-
195
- RAISES:
196
- TeradataMlException
197
-
198
- EXAMPLES:
199
- Saving a Pandas DataFrame using Fastload:
200
- >>> from teradataml.dataframe.fastload import fastload
201
- >>> from teradatasqlalchemy.types import *
202
-
203
- >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
204
- 'emp_sage': [100, 200, 300, 400],
205
- 'emp_id': [133, 144, 155, 177],
206
- 'marks': [99.99, 97.32, 94.67, 91.00]
207
- }
208
-
209
- >>> pandas_df = pd.DataFrame(df)
210
-
211
- # a) Default execution
212
- >>> fastload(df = pandas_df, table_name = 'my_table')
213
-
214
- # b) Save a Pandas DataFrame with primary_index
215
- >>> pandas_df = pandas_df.set_index(['emp_id'])
216
- >>> fastload(df = pandas_df, table_name = 'my_table_1', primary_index='emp_id')
217
-
218
- # c) Save a Pandas DataFrame using fastload() with index and primary_index
219
- >>> fastload(df = pandas_df, table_name = 'my_table_2', index=True,
220
- primary_index='index_label')
221
-
222
- # d) Save a Pandas DataFrame using types, appending to the table if it already exists
223
- >>> fastload(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
224
- index = True, index_label = 'my_index_label',
225
- primary_index = ['emp_id'], if_exists = 'append',
226
- types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
227
- 'emp_id': BIGINT, 'marks': DECIMAL})
228
-
229
- # e) Save a Pandas DataFrame using levels in index of type MultiIndex
230
- >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
231
- >>> fastload(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
232
- index = True, index_label = ['index1', 'index2'],
233
- primary_index = ['index1'], if_exists = 'replace')
234
-
235
- # f) Save a Pandas DataFrame by opening spcified number of teradata data transfer sessions
236
- >>> fastload(df = pandas_df, table_name = 'my_table_5', open_sessions = 2)
237
-
238
- """
239
- # Deriving global connection using get_connection()
240
- con = get_connection()
241
- try:
242
- if con is None:
243
- raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
244
- MessageCodes.CONNECTION_FAILURE)
245
-
246
- if isinstance(df, dataframe.DataFrame):
247
- raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
248
- 'df', "Pandas DataFrame"), MessageCodes.UNSUPPORTED_DATATYPE)
249
-
250
- dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name, if_exists=if_exists,
251
- index=index, index_label=index_label, primary_index=primary_index,
252
- types=types, batch_size=batch_size,
253
- save_errors=save_errors, api_name='fastload',
254
- use_fastload=True, open_sessions=open_sessions)
255
- # Validate DataFrame & related flags; Proceed only when True
256
- dt_obj._validate()
257
-
258
- # We have commented out the PTI related code for now as fastload fails to
259
- # load data into PTI tables. Same has been reported to gosql team. We'll
260
- # un-comment this once the issue is fixed.
261
- # Check if the table to be created must be a Primary Time Index (PTI) table.
262
- # If a user specifies the timecode_column parameter, and attempt to create
263
- # a PTI will be made.
264
- # is_pti = False
265
- # if timecode_column is not None:
266
- # is_pti = True
267
- # if primary_index is not None:
268
- # warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
269
- # 'primary_index',
270
- # 'timecode_column',
271
- # 'specified'))
272
- # else:
273
- # ignored = []
274
- # if timezero_date is not None: ignored.append('timezero_date')
275
- # if timebucket_duration is not None: ignored.append('timebucket_duration')
276
- # if sequence_column is not None: ignored.append('sequence_column')
277
- # if seq_max is not None: ignored.append('seq_max')
278
- # if columns_list is not None and (
279
- # not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
280
- # if primary_time_index_name is not None: ignored.append('primary_time_index_name')
281
- # if len(ignored) > 0:
282
- # warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
283
- # ignored,
284
- # 'timecode_column',
285
- # 'missing'))
286
-
287
- # Check and calculate batch size for optimized performance for FastLoad
288
- if batch_size is None:
289
- batch_size = _get_batchsize(df)
290
- else:
291
- # Validate argument batch_size type
292
- _Validators._validate_function_arguments([["batch_size", batch_size,
293
- False, (int)]])
294
- if batch_size < 100000:
295
- warnings.warn("The batch_size provided is less than 100000. Teradata \
296
- recommends using 100000 as minimum batch size for \
297
- improved performance.")
298
-
299
- # If the table created must be a PTI table, then validate additional parameters
300
- # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
301
- # will be ignored - for example, primary_index
302
- # if is_pti:
303
- # _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
304
- # timezero_date, primary_time_index_name, columns_list,
305
- # sequence_column, seq_max, types, index, index_label)
306
-
307
- # Check if destination table exists
308
- table_exists = dt_obj._table_exists(con)
309
-
310
- # Raise an exception when the table not exists and if_exists='fail'
311
- dt_obj._check_table_exists(is_table_exists=table_exists)
312
-
313
- # Let's create the SQLAlchemy table object to recreate the table
314
- if not table_exists or if_exists.lower() == 'replace':
315
- dt_obj._create_or_replace_table(con, table_exists=table_exists)
316
-
317
- fl_dict = _insert_from_dataframe(dt_obj, table_name, batch_size)
318
-
319
- # Check column compatibility for insertion when table exists and if_exists = 'append'
320
- if table_exists and if_exists.lower() == 'append':
321
- # Create table object
322
- table = UtilFuncs._get_sqlalchemy_table(table_name,
323
- schema_name=schema_name)
324
-
325
- cols = _extract_column_info(df, index=index, index_label=index_label)
326
- if table is not None:
327
- dt_obj._check_columns_compatibility(table_obj=table, cols=cols)
328
-
329
- stag_table_name = ''
330
- try:
331
- # Create staging table and use FastLoad to load data.
332
- # Then copy all the rows from staging table to target table using insert_into sql.
333
- stag_table_name = UtilFuncs._generate_temp_table_name(prefix="fl_stag",
334
- gc_on_quit=False,
335
- quote=False,
336
- table_type=TeradataConstants.TERADATA_TABLE)
337
-
338
- # Get the table name without schema name for further steps
339
- stag_table_name = stag_table_name.split('.')[-1].replace('"', '')
340
- # Create staging table object
341
- dt_obj._create_table(con, table_name=stag_table_name)
342
-
343
- # Insert data to staging table using faslload
344
- fl_dict = _insert_from_dataframe(dt_obj, stag_table_name, batch_size)
345
-
346
- # Insert data from staging table to target data.
347
- df_utils._insert_all_from_table(table_name,
348
- dt_obj._get_fully_qualified_table_name(table_name=stag_table_name),
349
- cols[0], schema_name)
350
- except:
351
- raise
352
- finally:
353
- if stag_table_name:
354
- UtilFuncs._drop_table(dt_obj._get_fully_qualified_table_name(stag_table_name))
355
-
356
- except (TeradataMlException, ValueError, TypeError):
357
- raise
358
- except Exception as err:
359
- raise TeradataMlException(Messages.get_message(MessageCodes.FASTLOAD_FAILS),
360
- MessageCodes.FASTLOAD_FAILS) from err
361
- return fl_dict
362
-
363
- def _insert_from_dataframe(dt_obj, table_name, batch_size):
364
- """
365
- This is an internal function used to to sequentially extract column info from DataFrame,
366
- iterate rows, and insert rows manually. Used for Insertions to Tables with Pandas index.
367
- This uses DBAPI's escape functions for Fastload which is a batch insertion method.
368
-
369
- PARAMETERS:
370
- dt_obj:
371
- Object of _DataTransferUtils class.
372
- Types: object
373
-
374
- table_name:
375
- Name of the table.
376
- Types: String
377
-
378
- batch_size:
379
- Specifies the number of rows to be inserted in a batch.
380
- Types: Int
381
-
382
- RETURNS:
383
- dict
384
-
385
- RAISES:
386
- Exception
387
-
388
- EXAMPLES:
389
- _insert_from_dataframe(dt_obj, table_name, batch_size=100)
390
- """
391
- conn = get_connection().connection
392
- # Create a cursor from connection object
393
- cur = conn.cursor()
394
-
395
- error_tablename = ""
396
- warn_tablename = ""
397
-
398
- try:
399
- # if is_pti:
400
- # # This if for non-index columns.
401
- # col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
402
-
403
- is_multi_index = isinstance(dt_obj.df.index, pd.MultiIndex)
404
-
405
- # The Fastload functionality is provided through several escape methods using
406
- # teradatasql; like: {fn teradata_try_fastload}, {fn teradata_get_errors}, etc.
407
- # - {fn teradata_nativesql}: This escape method is to specify to use native
408
- # SQL escape calls.
409
- # - {fn teradata_autocommit_off}: This escape method is to turn off auto-commit.
410
- # For FastLoad it is required that it should not execute any transaction
411
- # management SQL commands when auto-commit is on.
412
- # - {fn teradata_try_fastload}: This escape method tries to use FastLoad
413
- # for the INSERT statement, and automatically executes the INSERT as a regular
414
- # SQL statement when the INSERT is not compatible with FastLoad.
415
- # - {fn teradata_require_fastload}: This escape method requires FastLoad
416
- # for the INSERT statement, and fails with an error when the INSERT is not
417
- # compatible with FastLoad.
418
- # - {fn teradata_get_errors}: This escape method returns in one string all
419
- # data errors observed by FastLoad for the most recent batch. The data errors
420
- # are obtained from FastLoad error table 1, for problems such as constraint
421
- # violations, data type conversion errors, and unavailable AMP conditions.
422
- # - {fn teradata_get_warnings}: This escape method returns in one string all
423
- # warnings generated by FastLoad for the request. The warnings are obtained
424
- # from FastLoad error table 2, for problems such as duplicate rows.
425
- # - {fn teradata_logon_sequence_number}: This escape method returns the string
426
- # form of an integer representing the Logon Sequence Number(LSN) for the
427
- # FastLoad. Returns an empty string if the request is not a FastLoad.
428
-
429
- # Quoted, schema-qualified table name.
430
- table = dt_obj._get_fully_qualified_table_name(table_name)
431
-
432
- # Form the INSERT query for fastlod.
433
- ins = dt_obj._form_insert_query(table)
434
-
435
- # Turn off autocommit before the Fastload insertion
436
- dt_obj._process_escape_functions(cur, escape_function= \
437
- DriverEscapeFunctions.AUTOCOMMIT_OFF)
438
-
439
- # Initialize dict template for saving error/warning information
440
- err_dict = {key:[] for key in ['batch_no', 'error_message']}
441
- warn_dict = {key:[] for key in ['batch_no', 'error_message']}
442
-
443
- batch_number = 1
444
- num_batches = int(dt_obj.df.shape[0]/batch_size)
445
-
446
- for i in range(0, dt_obj.df.shape[0], batch_size):
447
- # Add the remaining rows to last batch after second last batch
448
- if (batch_number == num_batches) :
449
- last_elem = dt_obj.df.shape[0]
450
- else :
451
- last_elem = i + batch_size
452
-
453
- pdf = dt_obj.df.iloc[i:last_elem]
454
- insert_list = []
455
- # Iterate rows of DataFrame per batch size to convert it to list of lists.
456
- for row_index, row in enumerate(pdf.itertuples(index=True)):
457
- insert_list2 = []
458
- for col_index, x in enumerate(pdf.columns):
459
- insert_list2.append(row[col_index+1])
460
- if dt_obj.index is True:
461
- insert_list2.extend(row[0]) if is_multi_index else insert_list2.append(row[0])
462
- insert_list.append(insert_list2)
463
- # Execute insert statement
464
- cur.execute (ins, insert_list)
465
-
466
- # Get error and warning information
467
- err, _ = dt_obj._process_fastexport_errors_warnings(ins)
468
- if len(err) != 0:
469
- err_dict['batch_no'].extend([batch_number] * len(err))
470
- err_dict['error_message'].extend(err)
471
-
472
- print("Processed {} rows in batch {}.".format(pdf.shape[0], batch_number))
473
-
474
- # If shape of DataFrame equal to last_elem of last batch.
475
- if last_elem == dt_obj.df.shape[0]:
476
- break
477
-
478
- batch_number += 1
479
-
480
- # Get logon sequence number to be used for error/warning table names
481
- logon_seq_number = dt_obj._process_escape_functions(cur, escape_function= \
482
- DriverEscapeFunctions.LOGON_SEQ_NUM,
483
- insert_query=ins)
484
-
485
- # Commit the rows
486
- conn.commit()
487
-
488
- # Get error and warning information, if any.
489
- # Errors/Warnings like duplicate rows are added here.
490
- _, warn = dt_obj._process_fastexport_errors_warnings(ins)
491
- if len(warn) != 0:
492
- warn_dict['batch_no'].extend(['batch_summary'] * len(warn))
493
- warn_dict['error_message'].extend(warn)
494
-
495
- # Get error and warning informations for error and warning tables, persist
496
- # error and warning tables to Vantage if user has specified save_error as True
497
- # else show it as pandas dataframe on console.
498
- pd_err_df = dt_obj._get_pandas_df_from_errors_warnings(err_dict)
499
- if not pd_err_df.empty:
500
- msg_type = "err"
501
- error_tablename = dt_obj._create_error_warnings_table(pd_err_df, msg_type, logon_seq_number[0][0])
502
-
503
- pd_warn_df = dt_obj._get_pandas_df_from_errors_warnings(warn_dict)
504
- if not pd_warn_df.empty:
505
- msg_type = "warn"
506
- warn_tablename = dt_obj._create_error_warnings_table(pd_warn_df, msg_type, logon_seq_number[0][0])
507
-
508
- except Exception:
509
- conn.rollback()
510
- raise
511
- finally:
512
- # Turn on autocommit.
513
- dt_obj._process_escape_functions(cur, escape_function=DriverEscapeFunctions.AUTOCOMMIT_ON)
514
- cur.close()
515
-
516
- return {"errors_dataframe": pd_err_df, "warnings_dataframe": pd_warn_df,
517
- "errors_table": error_tablename, "warnings_table": warn_tablename}
518
-
519
- def _get_batchsize(df):
520
- """
521
- This internal function calculates batch size which should be more than 100000
522
- for better fastload performance.
523
-
524
- PARAMETERS:
525
- df:
526
- The Pandas DataFrame object for which the batch size has to be calculated.
527
- Types: pandas.DataFrame
528
-
529
- RETURNS:
530
- Batch size i.e. number of rows to be inserted in a batch.
531
-
532
- RAISES:
533
- N/A
534
-
535
- EXAMPLES:
536
- _get_batchsize(df)
537
- """
538
- return df.shape[0] if df.shape[0] <= 100000 else round(df.shape[0]/int(df.shape[0]/100000))
539
-
540
- def _create_table_for_fastload(df, con, table_name, schema_name=None, primary_index=None,
541
- is_pti=False, primary_time_index_name=None, timecode_column=None,
542
- timezero_date=None, timebucket_duration=None, sequence_column=None,
543
- seq_max=None, columns_list=[], types=None, index=False,
544
- index_label=None):
545
- """
546
- PARAMETERS:
547
- df:
548
- Specifies the Pandas DataFrame object to be saved.
549
- Types: pandas.DataFrame
550
-
551
- con:
552
- A SQLAlchemy connectable (engine/connection) object
553
- Types: Teradata connection object
554
-
555
- table_name:
556
- Specifies the name of the table to be created in Vantage.
557
- Types: String
558
-
559
- schema_name:
560
- Specifies the name of the database schema in Teradata Vantage to write to.
561
- Types: String
562
-
563
- index:
564
- Specifies whether to save Pandas DataFrame index as a column or not.
565
- Types: Boolean (True or False)
566
-
567
- index_label:
568
- Specifies the column label(s) for Pandas DataFrame index column(s).
569
- Types: String or list of strings
570
-
571
- primary_index:
572
- Specifies which column(s) to use as primary index while creating Teradata
573
- table in Vantage. When None, No Primary Index Teradata tables are created.
574
- Types: String or list of strings
575
-
576
- types:
577
- Specifies required data-types for requested columns to be saved in Vantage.
578
- Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
579
-
580
- RETURNS:
581
- Table object
582
-
583
- RAISES:
584
- TeradataMlException, sqlalchemy.OperationalError
585
-
586
- EXAMPLES:
587
- _create_table_for_fastload(df, con, table_name, schema_name, primary_index,
588
- is_pti, primary_time_index_name, timecode_column,
589
- timezero_date, timebucket_duration, sequence_column,
590
- seq_max, columns_list, types, index, index_label)
591
- """
592
- if is_pti:
593
- table = _create_pti_table_object(df=df, con=con, table_name=table_name,
594
- schema_name=schema_name, temporary=False,
595
- primary_time_index_name=primary_time_index_name,
596
- timecode_column=timecode_column, timezero_date=timezero_date,
597
- timebucket_duration=timebucket_duration,
598
- sequence_column=sequence_column, seq_max=seq_max,
599
- columns_list=columns_list, set_table=False,
600
- types=types, index=index, index_label=index_label)
601
-
602
- UtilFuncs._create_table_using_table_object(table)
603
-
1
+ #!/usr/bin/python
2
+ # ##################################################################
3
+ #
4
+ # Copyright 2019 Teradata. All rights reserved.
5
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
6
+ #
7
+ # Primary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
8
+ # Secondary Owner:
9
+ #
10
+ # ##################################################################
11
+
12
+ import re
13
+ import datetime
14
+ import warnings
15
+ import pandas as pd
16
+
17
+ from sqlalchemy import MetaData, Table, Column
18
+ from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
19
+
20
+ from teradataml.context.context import _get_current_databasename
21
+ from teradataml.dataframe import dataframe
22
+ from teradataml.context.context import *
23
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
24
+ from teradataml.common.constants import TeradataConstants, DriverEscapeFunctions
25
+ from teradataml.common.utils import UtilFuncs
26
+ from teradataml.common.garbagecollector import GarbageCollector
27
+ from teradataml.utils.validators import _Validators
28
+ from teradataml.dataframe.copy_to import copy_to_sql, \
29
+ _validate_pti_copy_parameters, _create_table_object, \
30
+ _create_pti_table_object, _extract_column_info, \
31
+ _check_columns_insertion_compatible
32
+ from teradataml.dataframe.data_transfer import _DataTransferUtils
33
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
34
+
35
+
36
+ @collect_queryband(queryband="fstLd")
37
+ def fastload(df, table_name, schema_name=None, if_exists='replace', index=False,
38
+ index_label=None, primary_index=None, types=None, batch_size=None,
39
+ save_errors=False, open_sessions=None, err_tbl_1_suffix=None,
40
+ err_tbl_2_suffix=None, err_tbl_name=None, warn_tbl_name=None,
41
+ err_staging_db=None):
42
+ """
43
+ The fastload() API writes records from a Pandas DataFrame to Teradata Vantage
44
+ using Fastload. FastLoad API can be used to quickly load large amounts of data
45
+ in an empty table on Vantage.
46
+ 1. Teradata recommends to use this API when number rows in the Pandas DataFrame
47
+ is greater than 100,000 to have better performance. To insert lesser rows,
48
+ please use copy_to_sql for optimized performance. The data is loaded in batches.
49
+ 2. FastLoad API cannot load duplicate rows in the DataFrame if the table is a
50
+ MULTISET table having primary index.
51
+ 3. FastLoad API does not support all Teradata Advanced SQL Engine data types.
52
+ For example, target table having BLOB and CLOB data type columns cannot be
53
+ loaded.
54
+ 4. If there are any incorrect rows i.e. due to constraint violations, data type
55
+ conversion errors, etc., FastLoad protocol ignores those rows and inserts
56
+ all valid rows.
57
+ 5. Rows in the DataFrame that failed to get inserted are categorized into errors
58
+ and warnings by FastLoad protocol and these errors and warnings are stored
59
+ into respective error and warning tables by FastLoad API.
60
+ 6. fastload() creates 2 error tables when data is erroneous. These error tables are
61
+ refered as ERR_1 and ERR_2 tables.
62
+ * ERR_1 table is used to capture rows that violate the constraints or have format
63
+ errors. It typically contains information about rows that could not be inserted
64
+ into the target table due to data conversion errors, constraint violations, etc.
65
+ * ERR_2 table is used to log any duplicate rows found during the load process and
66
+ which are not loaded in target table, since fastLoad does not allow duplicate
67
+ rows to be loaded into the target table.
68
+ 7. When "save_errors" argument is set to True, ERR_1 and ERR_2 tables are presisted.
69
+ The fully qualified names of ERR_1, ERR_2 and warning tables are shown once the
70
+ fastload operation is complete.
71
+ 8. If user wants both error and warnings information from pandas dataframe to be
72
+ persisted rather than that from ERR_1 and ERR_2 tables, then "save_errors" should
73
+ be set to True and "err_tbl_name" must be provided.
74
+
75
+ For additional information about FastLoad protocol through teradatasql driver,
76
+ please refer the FASTLOAD section of https://pypi.org/project/teradatasql/#FastLoad
77
+ driver documentation for more information.
78
+
79
+ PARAMETERS:
80
+ df:
81
+ Required Argument.
82
+ Specifies the Pandas DataFrame object to be saved in Vantage.
83
+ Types: pandas.DataFrame
84
+
85
+ table_name:
86
+ Required Argument.
87
+ Specifies the name of the table to be created in Vantage.
88
+ Types: String
89
+
90
+ schema_name:
91
+ Optional Argument.
92
+ Specifies the name of the database schema in Vantage to write to.
93
+ Types: String
94
+ Default: None (Uses default database schema).
95
+
96
+ if_exists:
97
+ Optional Argument.
98
+ Specifies the action to take when table already exists in Vantage.
99
+ Types: String
100
+ Possible values: {'fail', 'replace', 'append'}
101
+ - fail: If table exists, raise TeradataMlException.
102
+ - replace: If table exists, drop it, recreate it, and insert data.
103
+ - append: If table exists, insert data. Create if does not exist.
104
+ Default: replace
105
+
106
+ index:
107
+ Optional Argument.
108
+ Specifies whether to save Pandas DataFrame index as a column or not.
109
+ Types: Boolean (True or False)
110
+ Default: False
111
+
112
+ index_label:
113
+ Optional Argument.
114
+ Specifies the column label(s) for Pandas DataFrame index column(s).
115
+ Types: String or list of strings
116
+ Default: None
117
+
118
+ primary_index:
119
+ Optional Argument.
120
+ Specifies which column(s) to use as primary index while creating table
121
+ in Vantage. When set to None, No Primary Index (NoPI) tables are created.
122
+ Types: String or list of strings
123
+ Default: None
124
+ Example:
125
+ primary_index = 'my_primary_index'
126
+ primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
127
+
128
+ types:
129
+ Optional Argument.
130
+ Specifies the data types for requested columns to be saved in Vantage.
131
+ Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
132
+ Default: None
133
+
134
+ Note:
135
+ 1. This argument accepts a dictionary of columns names and their required
136
+ teradatasqlalchemy types as key-value pairs, allowing to specify a subset
137
+ of the columns of a specific type.
138
+ i) When only a subset of all columns are provided, the column types
139
+ for the rest are assigned appropriately.
140
+ ii) When types argument is not provided, the column types are assigned
141
+ as listed in the following table:
142
+ +---------------------------+-----------------------------------------+
143
+ | Pandas/Numpy Type | teradatasqlalchemy Type |
144
+ +---------------------------+-----------------------------------------+
145
+ | int32 | INTEGER |
146
+ +---------------------------+-----------------------------------------+
147
+ | int64 | BIGINT |
148
+ +---------------------------+-----------------------------------------+
149
+ | bool | BYTEINT |
150
+ +---------------------------+-----------------------------------------+
151
+ | float32/float64 | FLOAT |
152
+ +---------------------------+-----------------------------------------+
153
+ | datetime64/datetime64[ns] | TIMESTAMP |
154
+ +---------------------------+-----------------------------------------+
155
+ | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
156
+ +---------------------------+-----------------------------------------+
157
+ | Any other data type | VARCHAR(configure.default_varchar_size) |
158
+ +---------------------------+-----------------------------------------+
159
+ 2. This argument does not have any effect when the table specified using
160
+ table_name and schema_name exists and if_exists = 'append'.
161
+
162
+ batch_size:
163
+ Optional Argument.
164
+ Specifies the number of rows to be loaded in a batch. For better performance,
165
+ recommended batch size is at least 100,000. batch_size must be a positive integer.
166
+ If this argument is None, there are two cases based on the number of
167
+ rows, say N in the dataframe 'df' as explained below:
168
+ If N is greater than 100,000, the rows are divided into batches of
169
+ equal size with each batch having at least 100,000 rows (except the
170
+ last batch which might have more rows). If N is less than 100,000, the
171
+ rows are inserted in one batch after notifying the user that insertion
172
+ happens with degradation of performance.
173
+ If this argument is not None, the rows are inserted in batches of size
174
+ given in the argument, irrespective of the recommended batch size.
175
+ The last batch will have rows less than the batch size specified, if the
176
+ number of rows is not an integral multiples of the argument batch_size.
177
+ Default Value: None
178
+ Types: int
179
+
180
+ save_errors:
181
+ Optional Argument.
182
+ Specifies whether to persist the error/warning information in Vantage
183
+ or not.
184
+ Notes:
185
+ * When "save_errors" is set to True, ERR_1 and ERR_2 tables are presisted.
186
+ The fully qualified names of ERR_1, ERR_2 and warning table are returned
187
+ in a dictionary containing keys named as "ERR_1_table", "ERR_2_table",
188
+ "warnings_table" respectively.
189
+ * When "save_errors" is set to True and "err_tbl_name" is also provided,
190
+ "err_tbl_name" takes precedence and error information is persisted into
191
+ a single table using pandas dataframe rather than in ERR_1 and ERR_2 tables.
192
+ * When "save_errors" is set to False, errors and warnings information is
193
+ not persisted as tables, but it is returned as pandas dataframes in a
194
+ dictionary containing keys named as "errors_dataframe" and "warnings_dataframe"
195
+ respectively.
196
+ Default Value: False
197
+ Types: bool
198
+
199
+ open_sessions:
200
+ Optional Argument.
201
+ Specifies the number of Teradata data transfer sessions to be opened for fastload operation.
202
+ Note : If "open_sessions" argument is not provided, the default value is the smaller of 8 or the
203
+ number of AMPs available.
204
+ For additional information about number of Teradata data-transfer
205
+ sessions opened during fastload, please refer to:
206
+ https://pypi.org/project/teradatasql/#FastLoad
207
+ Default Value: None
208
+ Types: int
209
+
210
+ err_tbl_1_suffix:
211
+ Optional Argument.
212
+ Specifies the suffix for error table 1 created by fastload job.
213
+ Default Value: "_ERR_1"
214
+ Types: String
215
+
216
+ err_tbl_2_suffix:
217
+ Optional Argument.
218
+ Specifies the suffix for error table 2 created by fastload job.
219
+ Default Value: "_ERR_2"
220
+ Types: String
221
+
222
+ err_tbl_name:
223
+ Optional Argument.
224
+ Specifies the name for error table. This argument takes precedence
225
+ over "save_errors" and saves error information in single table,
226
+ rather than ERR_1 and ERR_2 error tables.
227
+ Default value: "td_fl_<table_name>_err_<unique_id>" where table_name
228
+ is name of target/staging table and unique_id is logon
229
+ sequence number of fastload job.
230
+ Types: String
231
+
232
+ warn_tbl_name:
233
+ Optional Argument.
234
+ Specifies the name for warning table.
235
+ Default value: "td_fl_<table_name>_warn_<unique_id>" where table_name
236
+ is name of target/staging table and unique_id is logon
237
+ sequence number of fastload job.
238
+ Types: String
239
+
240
+ err_staging_db:
241
+ Optional Argument.
242
+ Specifies the name of the database to be used for creating staging
243
+ table and error/warning tables.
244
+ Note:
245
+ Current session user must have CREATE, DROP and INSERT table
246
+ permissions on err_staging_db database.
247
+ Types: String
248
+
249
+ RETURNS:
250
+ A dict containing the following attributes:
251
+ 1. errors_dataframe: It is a Pandas DataFrame containing error messages
252
+ thrown by fastload. DataFrame is empty if there are no errors or
253
+ "save_errors" is set to True.
254
+ 2. warnings_dataframe: It is a Pandas DataFrame containing warning messages
255
+ thrown by fastload. DataFrame is empty if there are no warnings.
256
+ 3. errors_table: Fully qualified name of the table containing errors. It is
257
+ an empty string (''), if argument "save_errors" is set to False.
258
+ 4. warnings_table: Fully qualified name of the table containing warnings. It is
259
+ an empty string (''), if argument "save_errors" is set to False.
260
+ 5. ERR_1_table: Fully qualified name of the ERR 1 table created by fastload
261
+ job. It is an empty string (''), if argument "save_errors" is set to False.
262
+ 6. ERR_2_table: Fully qualified name of the ERR 2 table created by fastload
263
+ job. It is an empty string (''), if argument "save_errors" is set to False.
264
+
265
+ RAISES:
266
+ TeradataMlException
267
+
268
+ EXAMPLES:
269
+ Saving a Pandas DataFrame using Fastload:
270
+ >>> from teradataml.dataframe.fastload import fastload
271
+ >>> from teradatasqlalchemy.types import *
272
+
273
+ >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
274
+ 'emp_sage': [100, 200, 300, 400],
275
+ 'emp_id': [133, 144, 155, 177],
276
+ 'marks': [99.99, 97.32, 94.67, 91.00]
277
+ }
278
+
279
+ >>> pandas_df = pd.DataFrame(df)
280
+
281
+ # Example 1: Default execution.
282
+ >>> fastload(df = pandas_df, table_name = 'my_table')
283
+
284
+ # Example 2: Save a Pandas DataFrame with primary_index.
285
+ >>> pandas_df = pandas_df.set_index(['emp_id'])
286
+ >>> fastload(df = pandas_df, table_name = 'my_table_1', primary_index='emp_id')
287
+
288
+ # Example 3: Save a Pandas DataFrame using fastload() with index and primary_index.
289
+ >>> fastload(df = pandas_df, table_name = 'my_table_2', index=True,
290
+ primary_index='index_label')
291
+
292
+ # Example 4: Save a Pandas DataFrame using types, appending to the table if it already exists.
293
+ >>> fastload(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
294
+ index = True, index_label = 'my_index_label',
295
+ primary_index = ['emp_id'], if_exists = 'append',
296
+ types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
297
+ 'emp_id': BIGINT, 'marks': DECIMAL})
298
+
299
+ # Example 5: Save a Pandas DataFrame using levels in index of type MultiIndex.
300
+ >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
301
+ >>> fastload(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
302
+ index = True, index_label = ['index1', 'index2'],
303
+ primary_index = ['index1'], if_exists = 'replace')
304
+
305
+ # Example 6: Save a Pandas DataFrame by opening specified number of teradata data transfer sessions.
306
+ >>> fastload(df = pandas_df, table_name = 'my_table_5', open_sessions = 2)
307
+
308
+ # Example 7: Save a Pandas Dataframe to a table in specified target database "schema_name".
309
+ # Save errors and warnings to database specified with "err_staging_db".
310
+ # Save errors to table named as "err_tbl_name" and warnings to "warn_tbl_name".
311
+ # Given that, user is connected to a database different from "schema_name"
312
+ # and "err_staging_db".
313
+
314
+ # Create a pandas dataframe having one duplicate and one fualty row.
315
+ >>>> data_dict = {"C_ID": [301, 301, 302, 303, 304, 305, 306, 307, 308],
316
+ "C_timestamp": ['2014-01-06 09:01:25', '2014-01-06 09:01:25',
317
+ '2015-01-06 09:01:25.25.122200', '2017-01-06 09:01:25.11111',
318
+ '2013-01-06 09:01:25', '2019-03-06 10:15:28',
319
+ '2014-01-06 09:01:25.1098', '2014-03-06 10:01:02',
320
+ '2014-03-06 10:01:20.0000']}
321
+ >>> my_df = pd.DataFrame(data_dict)
322
+
323
+ # Fastlaod data in non-default schema "target_db" and save erors and warnings in given tables.
324
+ >>> fastload(df=my_df, table_name='fastload_with_err_warn_tbl_stag_db',
325
+ if_exists='replace', primary_index='C_ID', schema_name='target_db',
326
+ types={'C_ID': INTEGER, 'C_timestamp': TIMESTAMP(6)},
327
+ err_tbl_name='fld_errors', warn_tbl_name='fld_warnings',
328
+ err_staging_db='stage_db')
329
+ Processed 9 rows in batch 1.
330
+ {'errors_dataframe': batch_no error_message
331
+ 0 1 [Session 14527] [Teradata Database] [Error 26...,
332
+ 'warnings_dataframe': batch_no error_message
333
+ 0 batch_summary [Session 14526] [Teradata SQL Driver] [Warnin...,
334
+ 'errors_table': 'stage_db.fld_errors',
335
+ 'warnings_table': 'stage_db.fld_warnings',
336
+ 'ERR_1_table': '',
337
+ 'ERR_2_table': ''}
338
+
339
+ # Validate loaded data table.
340
+ >>> DataFrame(in_schema("target_db", "fastload_with_err_warn_tbl_stag_db"))
341
+ C_ID C_timestamp
342
+ 303 2017-01-06 09:01:25.111110
343
+ 306 2014-01-06 09:01:25.109800
344
+ 304 2013-01-06 09:01:25.000000
345
+ 307 2014-03-06 10:01:02.000000
346
+ 305 2019-03-06 10:15:28.000000
347
+ 301 2014-01-06 09:01:25.000000
348
+ 308 2014-03-06 10:01:20.000000
349
+
350
+ # Validate error and warning tables.
351
+ >>> DataFrame("fld_errors")
352
+ batch_no error_message
353
+ 1 [Session 14527] [Teradata Database] [Error 2673] FastLoad failed to insert 1 of 9 batched rows. Batched row 3 failed to insert because of Teradata Database error 2673 in "target_db"."fastload_with_err_warn_tbl_stag_db"."C_timestamp"
354
+
355
+ >>> DataFrame("fld_warnings")
356
+ batch_no error_message
357
+ batch_summary [Session 14526] [Teradata SQL Driver] [Warning 518] Found 1 duplicate or faulty row(s) while ending FastLoad of database table "target_db"."fastload_with_err_warn_tbl_stag_db": expected a row count of 8, got a row count of 7
358
+
359
+ # Example 8: Save a Pandas Dataframe to a table in specified target database "schema_name".
360
+ # Save errors in ERR_1 and ERR_2 tables having user defined suffixes provided
361
+ # in "err_tbl_1_suffix" and "err_tbl_2_suffix".
362
+ # Source Pandas dataframe is same as Example 7.
363
+
364
+ >>> fastload(df=my_df, table_name = 'fastload_with_err_warn_tbl_stag_db',
365
+ schema_name = 'target_db', if_exists = 'append',
366
+ types={'C_ID': INTEGER, 'C_timestamp': TIMESTAMP(6)},
367
+ err_staging_db='stage_db', save_errors=True,
368
+ err_tbl_1_suffix="_user_err_1", err_tbl_2_suffix="_user_err_2")
369
+ {'errors_dataframe': Empty DataFrame
370
+ Columns: []
371
+ Index: [],
372
+ 'warnings_dataframe': batch_no error_message
373
+ 0 batch_summary [Session 14699] [Teradata SQL Driver] [Warnin...,
374
+ 'errors_table': '',
375
+ 'warnings_table': 'stage_db.td_fl_fastload_with_err_warn_tbl_stag_db_warn_1730',
376
+ 'ERR_1_table': 'stage_db.ml__fl_stag_1716272404181579_user_err_1',
377
+ 'ERR_2_table': 'stage_db.ml__fl_stag_1716272404181579_user_err_2'}
378
+
379
+ # Validate ERR_1 and ERR_2 tables.
380
+ >>> DataFrame(in_schema("stage_db", "ml__fl_stag_1716270574550744_user_err_1"))
381
+ ErrorCode ErrorFieldName DataParcel
382
+ 2673 F_C_timestamp b'12E...'
383
+
384
+ >>> DataFrame(in_schema("stage_db", "ml__fl_stag_1716270574550744_user_err_2"))
385
+ C_ID C_timestamp
386
+
387
+ """
388
+ # Deriving global connection using get_connection()
389
+ con = get_connection()
390
+ try:
391
+ if con is None:
392
+ raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
393
+ MessageCodes.CONNECTION_FAILURE)
394
+
395
+ if isinstance(df, dataframe.DataFrame):
396
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
397
+ 'df', "Pandas DataFrame"), MessageCodes.UNSUPPORTED_DATATYPE)
398
+
399
+ dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name, if_exists=if_exists,
400
+ index=index, index_label=index_label, primary_index=primary_index,
401
+ types=types, batch_size=batch_size,
402
+ save_errors=save_errors, api_name='fastload',
403
+ use_fastload=True, open_sessions=open_sessions,
404
+ err_tbl_1_suffix=err_tbl_1_suffix, err_tbl_2_suffix=err_tbl_2_suffix,
405
+ err_tbl_name=err_tbl_name, warn_tbl_name=warn_tbl_name,
406
+ err_staging_db=err_staging_db)
407
+ # Validate DataFrame & related flags; Proceed only when True
408
+ dt_obj._validate()
409
+
410
+ # We have commented out the PTI related code for now as fastload fails to
411
+ # load data into PTI tables. Same has been reported to gosql team. We'll
412
+ # un-comment this once the issue is fixed.
413
+ # Check if the table to be created must be a Primary Time Index (PTI) table.
414
+ # If a user specifies the timecode_column parameter, and attempt to create
415
+ # a PTI will be made.
416
+ # is_pti = False
417
+ # if timecode_column is not None:
418
+ # is_pti = True
419
+ # if primary_index is not None:
420
+ # warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
421
+ # 'primary_index',
422
+ # 'timecode_column',
423
+ # 'specified'))
424
+ # else:
425
+ # ignored = []
426
+ # if timezero_date is not None: ignored.append('timezero_date')
427
+ # if timebucket_duration is not None: ignored.append('timebucket_duration')
428
+ # if sequence_column is not None: ignored.append('sequence_column')
429
+ # if seq_max is not None: ignored.append('seq_max')
430
+ # if columns_list is not None and (
431
+ # not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
432
+ # if primary_time_index_name is not None: ignored.append('primary_time_index_name')
433
+ # if len(ignored) > 0:
434
+ # warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
435
+ # ignored,
436
+ # 'timecode_column',
437
+ # 'missing'))
438
+
439
+ # Check and calculate batch size for optimized performance for FastLoad
440
+ if batch_size is None:
441
+ batch_size = _get_batchsize(df)
442
+ else:
443
+ # Validate argument batch_size type
444
+ _Validators._validate_function_arguments([["batch_size", batch_size,
445
+ False, (int)]])
446
+ if batch_size < 100000:
447
+ warnings.warn("The batch_size provided is less than 100000. "
448
+ "Teradata recommends using 100000 as minimum batch "
449
+ "size for improved performance.", stacklevel=2)
450
+
451
+ # If the table created must be a PTI table, then validate additional parameters
452
+ # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
453
+ # will be ignored - for example, primary_index
454
+ # if is_pti:
455
+ # _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
456
+ # timezero_date, primary_time_index_name, columns_list,
457
+ # sequence_column, seq_max, types, index, index_label)
458
+
459
+ # Check if destination table exists
460
+ table_exists = dt_obj._table_exists(con)
461
+
462
+ # Raise an exception when the table not exists and if_exists='fail'
463
+ dt_obj._check_table_exists(is_table_exists=table_exists)
464
+
465
+ # Let's create the SQLAlchemy table object to recreate the table
466
+ if not table_exists or if_exists.lower() == 'replace':
467
+ dt_obj._create_or_replace_table(con, table_exists=table_exists)
468
+
469
+ # Insert data to target table using fastload.
470
+ fl_dict = _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size)
471
+
472
+ # Check column compatibility for insertion when table exists and if_exists = 'append'
473
+ if table_exists and if_exists.lower() == 'append':
474
+ # Create table object
475
+ table = UtilFuncs._get_sqlalchemy_table(table_name,
476
+ schema_name=schema_name)
477
+
478
+ cols = _extract_column_info(df, index=index, index_label=index_label)
479
+ if table is not None:
480
+ dt_obj._check_columns_compatibility(table_obj=table, cols=cols)
481
+
482
+ stag_table_name = ''
483
+ try:
484
+ # Create staging table and use FastLoad to load data.
485
+ # Then copy all the rows from staging table to target table using insert_into sql.
486
+ # If err_staging_db is not provided, create staging table
487
+ # object in default connected DB.
488
+ if err_staging_db is None:
489
+ err_staging_db = _get_current_databasename()
490
+ stag_table_name = UtilFuncs._generate_temp_table_name(databasename=err_staging_db,
491
+ prefix="fl_stag",
492
+ gc_on_quit=False,
493
+ quote=False,
494
+ table_type=TeradataConstants.TERADATA_TABLE)
495
+
496
+ # Get the table name without schema name for further steps.
497
+ stag_table_name = UtilFuncs._extract_table_name(stag_table_name)
498
+ # Create staging table object.
499
+ dt_obj._create_table(con, table_name=stag_table_name,
500
+ schema_name=err_staging_db)
501
+
502
+ # Insert data to staging table using fastload.
503
+ fl_dict = _insert_from_pd_dataframe_with_fastload(dt_obj, stag_table_name, batch_size, err_staging_db)
504
+
505
+ # Insert data from staging table to target table.
506
+ df_utils._insert_all_from_table(table_name,
507
+ stag_table_name,
508
+ cols[0],
509
+ schema_name,
510
+ err_staging_db)
511
+ except:
512
+ raise
513
+ finally:
514
+ # Drop the staging table.
515
+ if stag_table_name:
516
+ UtilFuncs._drop_table(dt_obj._get_fully_qualified_table_name(stag_table_name, err_staging_db))
517
+
518
+ except (TeradataMlException, ValueError, TypeError):
519
+ raise
520
+ except Exception as err:
521
+ raise TeradataMlException(Messages.get_message(MessageCodes.FASTLOAD_FAILS),
522
+ MessageCodes.FASTLOAD_FAILS) from err
523
+ return fl_dict
524
+
525
+
526
+ def _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size, to_schema_name=None):
527
+ """
528
+ This is an internal function used to sequentially extract column info from pandas DataFrame,
529
+ iterate rows, and insert rows manually. Used for insertions to Tables with Pandas index.
530
+ This uses DBAPI's escape functions for Fastload which is a batch insertion method.
531
+
532
+ PARAMETERS:
533
+ dt_obj:
534
+ Object of _DataTransferUtils class.
535
+ Types: object
536
+
537
+ table_name:
538
+ Name of the table.
539
+ Types: String
540
+
541
+ batch_size:
542
+ Specifies the number of rows to be inserted in a batch.
543
+ Types: Int
544
+
545
+ to_schema_name:
546
+ Optional Argument.
547
+ Specifies name of the database schema where target table needs to be created.
548
+
549
+ RETURNS:
550
+ dict
551
+
552
+ RAISES:
553
+ Exception
554
+
555
+ EXAMPLES:
556
+ _insert_from_pd_dataframe_with_fastload(dt_obj, table_name, batch_size=100)
557
+ """
558
+ conn = get_connection().connection
559
+ # Create a cursor from connection object
560
+ cur = conn.cursor()
561
+
562
+ error_tablename = ""
563
+ warn_tablename = ""
564
+
565
+ try:
566
+ # if is_pti:
567
+ # # This if for non-index columns.
568
+ # col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
569
+
570
+ is_multi_index = isinstance(dt_obj.df.index, pd.MultiIndex)
571
+
572
+ # The Fastload functionality is provided through several escape methods using
573
+ # teradatasql; like: {fn teradata_try_fastload}, {fn teradata_get_errors}, etc.
574
+ # - {fn teradata_nativesql}: This escape method is to specify to use native
575
+ # SQL escape calls.
576
+ # - {fn teradata_autocommit_off}: This escape method is to turn off auto-commit.
577
+ # For FastLoad it is required that it should not execute any transaction
578
+ # management SQL commands when auto-commit is on.
579
+ # - {fn teradata_try_fastload}: This escape method tries to use FastLoad
580
+ # for the INSERT statement, and automatically executes the INSERT as a regular
581
+ # SQL statement when the INSERT is not compatible with FastLoad.
582
+ # - {fn teradata_require_fastload}: This escape method requires FastLoad
583
+ # for the INSERT statement, and fails with an error when the INSERT is not
584
+ # compatible with FastLoad.
585
+ # - {fn teradata_get_errors}: This escape method returns in one string all
586
+ # data errors observed by FastLoad for the most recent batch. The data errors
587
+ # are obtained from FastLoad error table 1, for problems such as constraint
588
+ # violations, data type conversion errors, and unavailable AMP conditions.
589
+ # - {fn teradata_get_warnings}: This escape method returns in one string all
590
+ # warnings generated by FastLoad for the request. The warnings are obtained
591
+ # from FastLoad error table 2, for problems such as duplicate rows.
592
+ # - {fn teradata_logon_sequence_number}: This escape method returns the string
593
+ # form of an integer representing the Logon Sequence Number(LSN) for the
594
+ # FastLoad. Returns an empty string if the request is not a FastLoad.
595
+
596
+ # Quoted, schema-qualified table name.
597
+ table = dt_obj._get_fully_qualified_table_name(table_name, to_schema_name)
598
+
599
+ # Form the INSERT query for fastload.
600
+ ins = dt_obj._form_insert_query(table)
601
+
602
+ # Turn off autocommit before the Fastload insertion
603
+ dt_obj._process_escape_functions(cur, escape_function= \
604
+ DriverEscapeFunctions.AUTOCOMMIT_OFF)
605
+
606
+ # Initialize dict template for saving error/warning information
607
+ err_dict = {key: [] for key in ['batch_no', 'error_message']}
608
+ warn_dict = {key: [] for key in ['batch_no', 'error_message']}
609
+
610
+ batch_number = 1
611
+ num_batches = int(dt_obj.df.shape[0]/batch_size)
612
+
613
+ # Empty queryband buffer before SQL call.
614
+ UtilFuncs._set_queryband()
615
+
616
+ for i in range(0, dt_obj.df.shape[0], batch_size):
617
+ # Add the remaining rows to last batch after second last batch
618
+ if (batch_number == num_batches) :
619
+ last_elem = dt_obj.df.shape[0]
620
+ else:
621
+ last_elem = i + batch_size
622
+
623
+ pdf = dt_obj.df.iloc[i:last_elem]
624
+ insert_list = []
625
+ # Iterate rows of DataFrame per batch size to convert it to list of lists.
626
+ for row_index, row in enumerate(pdf.itertuples(index=True)):
627
+ insert_list2 = []
628
+ for col_index, x in enumerate(pdf.columns):
629
+ insert_list2.append(row[col_index+1])
630
+ if dt_obj.index is True:
631
+ insert_list2.extend(row[0]) if is_multi_index else insert_list2.append(row[0])
632
+ insert_list.append(insert_list2)
633
+ # Execute insert statement.
634
+ cur.execute(ins, insert_list)
635
+
636
+ # Get error and warning information from cursor.
637
+ err, _ = dt_obj._process_fastexport_errors_warnings(ins)
638
+ if len(err) != 0:
639
+ err_dict['batch_no'].extend([batch_number] * len(err))
640
+ err_dict['error_message'].extend(err)
641
+
642
+ print("Processed {} rows in batch {}.".format(pdf.shape[0], batch_number))
643
+
644
+ # If shape of DataFrame equal to last_elem of last batch.
645
+ if last_elem == dt_obj.df.shape[0]:
646
+ break
647
+
648
+ batch_number += 1
649
+
650
+ # Get logon sequence number to be used for error/warning table names
651
+ logon_seq_number = dt_obj._process_escape_functions(cur, escape_function= \
652
+ DriverEscapeFunctions.LOGON_SEQ_NUM,
653
+ insert_query=ins)
654
+ # Commit the rows
655
+ conn.commit()
656
+
657
+ # Get error and warning information, if any.
658
+ # Errors/Warnings like duplicate rows are added here.
659
+ _, warn = dt_obj._process_fastexport_errors_warnings(ins)
660
+ if len(warn) != 0:
661
+ warn_dict['batch_no'].extend(['batch_summary'] * len(warn))
662
+ warn_dict['error_message'].extend(warn)
663
+
664
+ # Get error and warning information for error and warning tables, persist
665
+ # error and warning tables to Vantage if user has specified save_error as True
666
+ # else show it as pandas dataframe on console.
667
+ pd_err_df = dt_obj._get_pandas_df_from_errors_warnings(err_dict)
668
+ pd_warn_df = dt_obj._get_pandas_df_from_errors_warnings(warn_dict)
669
+
670
+ # Create persistent tables using pandas df if
671
+ # save_errors=True or
672
+ # tables names for errors or warning are provided by user.
673
+ if dt_obj.save_errors or dt_obj.err_tbl_name:
674
+ if not pd_err_df.empty:
675
+ error_tablename = dt_obj._create_error_warnings_table(pd_err_df, "err", logon_seq_number[0][0],
676
+ dt_obj.err_tbl_name)
677
+ if dt_obj.save_errors or dt_obj.warn_tbl_name:
678
+ if not pd_warn_df.empty:
679
+ warn_tablename = dt_obj._create_error_warnings_table(pd_warn_df, "warn", logon_seq_number[0][0],
680
+ dt_obj.warn_tbl_name)
681
+
682
+ # Generate ERR_1 and ERR_2 table names if save_errors=True and
683
+ # errors are not stored in user provided error table name.
684
+ if dt_obj.save_errors and not dt_obj.err_tbl_name:
685
+ err_1_table = "{}.{}{}".format(dt_obj.err_staging_db if dt_obj.err_staging_db else _get_current_databasename(),
686
+ table_name,
687
+ dt_obj.err_tbl_1_suffix if dt_obj.err_tbl_1_suffix else "_ERR_1")
688
+ err_2_table = "{}.{}{}".format(dt_obj.err_staging_db if dt_obj.err_staging_db else _get_current_databasename(),
689
+ table_name,
690
+ dt_obj.err_tbl_2_suffix if dt_obj.err_tbl_2_suffix else "_ERR_2")
691
+
692
+ else:
693
+ err_1_table = ""
694
+ err_2_table = ""
695
+
696
+ except Exception:
697
+ conn.rollback()
698
+ raise
699
+ finally:
700
+ # Turn on autocommit.
701
+ dt_obj._process_escape_functions(cur, escape_function=DriverEscapeFunctions.AUTOCOMMIT_ON)
702
+ cur.close()
703
+
704
+ return {"errors_dataframe": pd_err_df, "warnings_dataframe": pd_warn_df,
705
+ "errors_table": error_tablename, "warnings_table": warn_tablename,
706
+ "ERR_1_table": err_1_table, "ERR_2_table": err_2_table}
707
+
708
+
709
+ def _get_batchsize(df):
710
+ """
711
+ This internal function calculates batch size which should be more than 100000
712
+ for better fastload performance.
713
+
714
+ PARAMETERS:
715
+ df:
716
+ The Pandas DataFrame object for which the batch size has to be calculated.
717
+ Types: pandas.DataFrame
718
+
719
+ RETURNS:
720
+ Batch size i.e. number of rows to be inserted in a batch.
721
+
722
+ RAISES:
723
+ N/A
724
+
725
+ EXAMPLES:
726
+ _get_batchsize(df)
727
+ """
728
+ return df.shape[0] if df.shape[0] <= 100000 else round(df.shape[0]/int(df.shape[0]/100000))
729
+
730
+
731
+ def _create_table_for_fastload(df, con, table_name, schema_name=None, primary_index=None,
732
+ is_pti=False, primary_time_index_name=None, timecode_column=None,
733
+ timezero_date=None, timebucket_duration=None, sequence_column=None,
734
+ seq_max=None, columns_list=[], types=None, index=False,
735
+ index_label=None):
736
+ """
737
+ PARAMETERS:
738
+ df:
739
+ Specifies the Pandas DataFrame object to be saved.
740
+ Types: pandas.DataFrame
741
+
742
+ con:
743
+ A SQLAlchemy connectable (engine/connection) object
744
+ Types: Teradata connection object
745
+
746
+ table_name:
747
+ Specifies the name of the table to be created in Vantage.
748
+ Types: String
749
+
750
+ schema_name:
751
+ Specifies the name of the database schema in Teradata Vantage to write to.
752
+ Types: String
753
+
754
+ index:
755
+ Specifies whether to save Pandas DataFrame index as a column or not.
756
+ Types: Boolean (True or False)
757
+
758
+ index_label:
759
+ Specifies the column label(s) for Pandas DataFrame index column(s).
760
+ Types: String or list of strings
761
+
762
+ primary_index:
763
+ Specifies which column(s) to use as primary index while creating Teradata
764
+ table in Vantage. When None, No Primary Index Teradata tables are created.
765
+ Types: String or list of strings
766
+
767
+ types:
768
+ Specifies required data-types for requested columns to be saved in Vantage.
769
+ Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
770
+
771
+ RETURNS:
772
+ Table object
773
+
774
+ RAISES:
775
+ TeradataMlException, sqlalchemy.OperationalError
776
+
777
+ EXAMPLES:
778
+ _create_table_for_fastload(df, con, table_name, schema_name, primary_index,
779
+ is_pti, primary_time_index_name, timecode_column,
780
+ timezero_date, timebucket_duration, sequence_column,
781
+ seq_max, columns_list, types, index, index_label)
782
+ """
783
+ if is_pti:
784
+ table = _create_pti_table_object(df=df, con=con, table_name=table_name,
785
+ schema_name=schema_name, temporary=False,
786
+ primary_time_index_name=primary_time_index_name,
787
+ timecode_column=timecode_column, timezero_date=timezero_date,
788
+ timebucket_duration=timebucket_duration,
789
+ sequence_column=sequence_column, seq_max=seq_max,
790
+ columns_list=columns_list, set_table=False,
791
+ types=types, index=index, index_label=index_label)
792
+
793
+ UtilFuncs._create_table_using_table_object(table)
794
+