teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -1,1698 +1,1769 @@
1
- #!/usr/bin/python
2
- # ##################################################################
3
- #
4
- # Copyright 2018 Teradata. All rights reserved.
5
- # TERADATA CONFIDENTIAL AND TRADE SECRET
6
- #
7
- # ##################################################################
8
-
9
- import re
10
- import datetime
11
- import warnings
12
- import pandas as pd
13
- import pandas.api.types as pt
14
-
15
- from sqlalchemy import MetaData, Table, Column
16
- from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
17
- from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
18
- from teradatasqlalchemy import (TIMESTAMP)
19
- from teradatasqlalchemy import (VARCHAR)
20
- from teradatasqlalchemy.dialect import TDCreateTablePost as post
21
- from teradataml.common.aed_utils import AedUtils
22
- from teradataml.context.context import *
23
- from teradataml.dataframe import dataframe as tdmldf
24
- from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
25
- from teradataml.dbutils.dbutils import _rename_table
26
- from teradataml.common.utils import UtilFuncs
27
- from teradataml.options.configure import configure
28
- from teradataml.common.constants import CopyToConstants, PTITableConstants
29
- from teradatasql import OperationalError
30
- from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
- from teradataml.utils.utils import execute_sql
32
- from teradataml.utils.validators import _Validators
33
-
34
-
35
-
36
- def copy_to_sql(df, table_name,
37
- schema_name=None, if_exists='append',
38
- index=False, index_label=None,
39
- primary_index=None,
40
- temporary=False, types = None,
41
- primary_time_index_name = None,
42
- timecode_column=None,
43
- timebucket_duration = None,
44
- timezero_date = None,
45
- columns_list=None,
46
- sequence_column=None,
47
- seq_max=None,
48
- set_table=False,
49
- chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
50
- match_column_order=True):
51
- """
52
- Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
53
-
54
- PARAMETERS:
55
-
56
- df:
57
- Required Argument.
58
- Specifies the Pandas or teradataml DataFrame object to be saved.
59
- Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
60
-
61
- table_name:
62
- Required Argument.
63
- Specifies the name of the table to be created in Vantage.
64
- Types : String
65
-
66
- schema_name:
67
- Optional Argument.
68
- Specifies the name of the SQL schema in Teradata Vantage to write to.
69
- Types: String
70
- Default: None (Uses default database schema).
71
-
72
- Note: schema_name will be ignored when temporary=True.
73
-
74
- if_exists:
75
- Optional Argument.
76
- Specifies the action to take when table already exists in Vantage.
77
- Types: String
78
- Possible values: {'fail', 'replace', 'append'}
79
- - fail: If table exists, do nothing.
80
- - replace: If table exists, drop it, recreate it, and insert data.
81
- - append: If table exists, insert data. Create if does not exist.
82
- Default : append
83
-
84
- Note: Replacing a table with the contents of a teradataml DataFrame based on
85
- the same underlying table is not supported.
86
-
87
- index:
88
- Optional Argument.
89
- Specifies whether to save Pandas DataFrame index as a column or not.
90
- Types : Boolean (True or False)
91
- Default : False
92
-
93
- Note: Only use as True when attempting to save Pandas DataFrames (and not with teradataml DataFrames).
94
-
95
- index_label:
96
- Optional Argument.
97
- Specifies the column label(s) for Pandas DataFrame index column(s).
98
- Types : String or list of strings
99
- Default : None
100
-
101
- Note: If index_label is not specified (defaulted to None or is empty) and `index` is True, then
102
- the 'names' property of the DataFrames index is used as the label(s),
103
- and if that too is None or empty, then:
104
- 1) a default label 'index_label' or 'level_0' (when 'index_label' is already taken) is used
105
- when index is standard.
106
- 2) default labels 'level_0', 'level_1', etc. are used when index is multi-level index.
107
-
108
- Only use as True when attempting to save Pandas DataFrames (and not on teradataml DataFrames).
109
-
110
- primary_index:
111
- Optional Argument.
112
- Specifies which column(s) to use as primary index while creating Teradata table(s) in Vantage.
113
- When None, No Primary Index Teradata tables are created.
114
- Types : String or list of strings
115
- Default : None
116
- Example:
117
- primary_index = 'my_primary_index'
118
- primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
119
-
120
- temporary:
121
- Optional Argument.
122
- Specifies whether to creates Vantage tables as permanent or volatile.
123
- Types : Boolean (True or False)
124
- Default : False
125
-
126
- Note: When True:
127
- 1. volatile Tables are created, and
128
- 2. schema_name is ignored.
129
- When False, permanent tables are created.
130
-
131
- types
132
- Optional Argument.
133
- Specifies required data-types for requested columns to be saved in Vantage.
134
- Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
135
- Default: None
136
-
137
- Note:
138
- 1. This argument accepts a dictionary of columns names and their required teradatasqlalchemy types
139
- as key-value pairs, allowing to specify a subset of the columns of a specific type.
140
- i) When the input is a Pandas DataFrame:
141
- - When only a subset of all columns are provided, the column types for the rest are assigned
142
- appropriately.
143
- - When types argument is not provided, the column types are assigned
144
- as listed in the following table:
145
- +---------------------------+-----------------------------------------+
146
- | Pandas/Numpy Type | teradatasqlalchemy Type |
147
- +---------------------------+-----------------------------------------+
148
- | int32 | INTEGER |
149
- +---------------------------+-----------------------------------------+
150
- | int64 | BIGINT |
151
- +---------------------------+-----------------------------------------+
152
- | bool | BYTEINT |
153
- +---------------------------+-----------------------------------------+
154
- | float32/float64 | FLOAT |
155
- +---------------------------+-----------------------------------------+
156
- | datetime64/datetime64[ns] | TIMESTAMP |
157
- +---------------------------+-----------------------------------------+
158
- | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
159
- +---------------------------+-----------------------------------------+
160
- | Any other data type | VARCHAR(configure.default_varchar_size) |
161
- +---------------------------+-----------------------------------------+
162
- ii) When the input is a teradataml DataFrame:
163
- - When only a subset of all columns are provided, the column types for the rest are retained.
164
- - When types argument is not provided, the column types are retained.
165
- 2. This argument does not have any effect when the table specified using table_name and schema_name
166
- exists and if_exists = 'append'.
167
-
168
- primary_time_index_name:
169
- Optional Argument.
170
- Specifies a name for the Primary Time Index (PTI) when the table
171
- to be created must be a PTI table.
172
- Type: String
173
-
174
- Note: This argument is not required or used when the table to be created
175
- is not a PTI table. It will be ignored if specified without the timecode_column.
176
-
177
- timecode_column:
178
- Optional argument.
179
- Required when the DataFrame must be saved as a PTI table.
180
- Specifies the column in the DataFrame that reflects the form
181
- of the timestamp data in the time series.
182
- This column will be the TD_TIMECODE column in the table created.
183
- It should be of SQL type TIMESTAMP(n), TIMESTAMP(n) WITH TIMEZONE, or DATE,
184
- corresponding to Python types datetime.datetime or datetime.date, or Pandas dtype datetime64[ns].
185
- Type: String
186
-
187
- Note: When you specify this parameter, an attempt to create a PTI table
188
- will be made. This argument is not required when the table to be created
189
- is not a PTI table. If this argument is specified, primary_index will be ignored.
190
-
191
- timezero_date:
192
- Optional Argument.
193
- Used when the DataFrame must be saved as a PTI table.
194
- Specifies the earliest time series data that the PTI table will accept;
195
- a date that precedes the earliest date in the time series data.
196
- Value specified must be of the following format: DATE 'YYYY-MM-DD'
197
- Default Value: DATE '1970-01-01'.
198
- Type: String
199
-
200
- Note: This argument is not required or used when the table to be created
201
- is not a PTI table. It will be ignored if specified without the timecode_column.
202
-
203
- timebucket_duration:
204
- Optional Argument.
205
- Required if columns_list is not specified or is None.
206
- Used when the DataFrame must be saved as a PTI table.
207
- Specifies a duration that serves to break up the time continuum in
208
- the time series data into discrete groups or buckets.
209
- Specified using the formal form time_unit(n), where n is a positive
210
- integer, and time_unit can be any of the following:
211
- CAL_YEARS, CAL_MONTHS, CAL_DAYS, WEEKS, DAYS, HOURS, MINUTES,
212
- SECONDS, MILLISECONDS, or MICROSECONDS.
213
- Type: String
214
-
215
- Note: This argument is not required or used when the table to be created
216
- is not a PTI table. It will be ignored if specified without the timecode_column.
217
-
218
- columns_list:
219
- Optional Argument.
220
- Used when the DataFrame must be saved as a PTI table.
221
- Required if timebucket_duration is not specified.
222
- A list of one or more PTI table column names.
223
- Type: String or list of Strings
224
-
225
- Note: This argument is not required or used when the table to be created
226
- is not a PTI table. It will be ignored if specified without the timecode_column.
227
-
228
- sequence_column:
229
- Optional Argument.
230
- Used when the DataFrame must be saved as a PTI table.
231
- Specifies the column of type Integer containing the unique identifier for
232
- time series data readings when they are not unique in time.
233
- * When specified, implies SEQUENCED, meaning more than one reading from the same
234
- sensor may have the same timestamp.
235
- This column will be the TD_SEQNO column in the table created.
236
- * When not specified, implies NONSEQUENCED, meaning there is only one sensor reading
237
- per timestamp.
238
- This is the default.
239
- Type: str
240
-
241
- Note: This argument is not required or used when the table to be created
242
- is not a PTI table. It will be ignored if specified without the timecode_column.
243
-
244
- seq_max:
245
- Optional Argument.
246
- Used when the DataFrame must be saved as a PTI table.
247
- Specifies the maximum number of sensor data rows that can have the
248
- same timestamp. Can be used when 'sequenced' is True.
249
- Accepted range: 1 - 2147483647.
250
- Default Value: 20000.
251
- Type: int
252
-
253
- Note: This argument is not required or used when the table to be created
254
- is not a PTI table. It will be ignored if specified without the timecode_column.
255
-
256
- set_table:
257
- Optional Argument.
258
- Specifies a flag to determine whether to create a SET or a MULTISET table.
259
- When True, a SET table is created.
260
- When False, a MULTISET table is created.
261
- Default Value: False
262
- Type: boolean
263
-
264
- Note: 1. Specifying set_table=True also requires specifying primary_index or timecode_column.
265
- 2. Creating SET table (set_table=True) may result in
266
- a. an error if the source is a Pandas DataFrame having duplicate rows.
267
- b. loss of duplicate rows if the source is a teradataml DataFrame.
268
- 3. This argument has no effect if the table already exists and if_exists='append'.
269
-
270
- chunksize:
271
- Optional Argument.
272
- Specifies the number of rows to be loaded in a batch.
273
- Note:
274
- This is argument is used only when argument "df" is pandas DataFrame.
275
- Default Value: 16383
276
- Types: int
277
-
278
- match_column_order:
279
- Optional Argument.
280
- Specifies whether the order of the columns in existing table matches the order of
281
- the columns in the "df" or not. When set to False, the dataframe to be loaded can
282
- have any order and number of columns.
283
- Default Value: True
284
- Types: bool
285
-
286
- RETURNS:
287
- None
288
-
289
- RAISES:
290
- TeradataMlException
291
-
292
- EXAMPLES:
293
- 1. Saving a Pandas DataFrame:
294
-
295
- >>> from teradataml.dataframe.copy_to import copy_to_sql
296
- >>> from teradatasqlalchemy.types import *
297
-
298
- >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
299
- 'emp_sage': [100, 200, 300, 400],
300
- 'emp_id': [133, 144, 155, 177],
301
- 'marks': [99.99, 97.32, 94.67, 91.00]
302
- }
303
-
304
- >>> pandas_df = pd.DataFrame(df)
305
-
306
- a) Save a Pandas DataFrame using a dataframe & table name only:
307
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table')
308
-
309
- b) Saving as a SET table
310
- >>> copy_to_sql(df = pandas_df, table_name = 'my_set_table', index=True,
311
- primary_index='index_label', set_table=True)
312
-
313
- c) Save a Pandas DataFrame by specifying additional parameters:
314
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_2', schema_name = 'alice',
315
- index = True, index_label = 'my_index_label', temporary = False,
316
- primary_index = ['emp_id'], if_exists = 'append',
317
- types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
318
- 'emp_id': BIGINT, 'marks': DECIMAL})
319
-
320
- d) Saving with additional parameters as a SET table
321
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
322
- index = True, index_label = 'my_index_label', temporary = False,
323
- primary_index = ['emp_id'], if_exists = 'append',
324
- types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
325
- 'emp_id': BIGINT, 'marks': DECIMAL},
326
- set_table=True)
327
-
328
- e) Saving levels in index of type MultiIndex
329
- >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
330
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
331
- index = True, index_label = ['index1', 'index2'], temporary = False,
332
- primary_index = ['index1'], if_exists = 'replace')
333
-
334
- 2. Saving a teradataml DataFrame:
335
-
336
- >>> from teradataml.dataframe.dataframe import DataFrame
337
- >>> from teradataml.dataframe.copy_to import copy_to_sql
338
- >>> from teradatasqlalchemy.types import *
339
- >>> from teradataml.data.load_example_data import load_example_data
340
-
341
- >>> # Load the data to run the example.
342
- >>> load_example_data("glm", "admissions_train")
343
-
344
- >>> # Create teradataml DataFrame(s)
345
- >>> df = DataFrame('admissions_train')
346
- >>> df2 = df.select(['gpa', 'masters'])
347
-
348
- a) Save a teradataml DataFrame by using only a table name:
349
- >>> df2.to_sql('my_tdml_table')
350
-
351
- b) Save a teradataml DataFrame by using additional parameters:
352
- >>> df2.to_sql(table_name = 'my_tdml_table', if_exists='append',
353
- primary_index = ['gpa'], temporary=False, schema_name='alice')
354
-
355
- c) Alternatively, save a teradataml DataFrame by using copy_to_sql:
356
- >>> copy_to_sql(df2, 'my_tdml_table_2')
357
-
358
- d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
359
- >>> copy_to_sql(df = df2, table_name = 'my_tdml_table_3', schema_name = 'alice',
360
- temporary = False, primary_index = None, if_exists = 'append',
361
- types = {'masters': VARCHAR, 'gpa':INTEGER})
362
-
363
- e) Saving as a SET table
364
- >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name = 'alice',
365
- temporary = False, primary_index = ['gpa'], if_exists = 'append',
366
- types = {'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
367
-
368
- 3. Saving a teradataml DataFrame as a PTI table:
369
-
370
- >>> from teradataml.dataframe.dataframe import DataFrame
371
- >>> from teradataml.dataframe.copy_to import copy_to_sql
372
- >>> from teradataml.data.load_example_data import load_example_data
373
-
374
- >>> load_example_data("sessionize", "sessionize_table")
375
- >>> df3 = DataFrame('sessionize_table')
376
-
377
- a) Using copy_to_sql
378
- >>> copy_to_sql(df3, "test_copyto_pti",
379
- timecode_column='clicktime',
380
- columns_list='event')
381
-
382
- b) Alternatively, using DataFrame.to_sql
383
- >>> df3.to_sql(table_name = "test_copyto_pti_1",
384
- timecode_column='clicktime',
385
- columns_list='event')
386
-
387
- c) Saving as a SET table
388
- >>> copy_to_sql(df3, "test_copyto_pti_2",
389
- timecode_column='clicktime',
390
- columns_list='event',
391
- set_table=True)
392
-
393
- """
394
- # Deriving global connection using get_connection().
395
- con = get_connection()
396
-
397
- try:
398
- if con is None:
399
- raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE), MessageCodes.CONNECTION_FAILURE)
400
-
401
- # Check if the table to be created must be a Primary Time Index (PTI) table.
402
- # If a user specifies the timecode_column parameter, and attempt to create
403
- # a PTI will be made.
404
- is_pti = False
405
- if timecode_column is not None:
406
- is_pti = True
407
- if primary_index is not None:
408
- warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
409
- 'primary_index',
410
- 'timecode_column',
411
- 'specified'))
412
- else:
413
- ignored = []
414
- if timezero_date is not None: ignored.append('timezero_date')
415
- if timebucket_duration is not None: ignored.append('timebucket_duration')
416
- if sequence_column is not None: ignored.append('sequence_column')
417
- if seq_max is not None: ignored.append('seq_max')
418
- if columns_list is not None and (
419
- not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
420
- if primary_time_index_name is not None: ignored.append('primary_time_index_name')
421
- if len(ignored) > 0:
422
- warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
423
- ignored,
424
- 'timecode_column',
425
- 'missing'))
426
-
427
- # Unset schema_name when temporary is True since volatile tables are always in the user database
428
- if temporary is True:
429
- if schema_name is not None:
430
- warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
431
- 'schema_name',
432
- 'temporary=True',
433
- 'specified'))
434
- schema_name = None
435
-
436
- # Validate DataFrame & related flags; Proceed only when True
437
- from teradataml.dataframe.data_transfer import _DataTransferUtils
438
- dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name,
439
- if_exists=if_exists, index=index, index_label=index_label,
440
- primary_index=primary_index, temporary=temporary,
441
- types=types, primary_time_index_name=primary_time_index_name,
442
- timecode_column=timecode_column,
443
- timebucket_duration=timebucket_duration,
444
- timezero_date=timezero_date, columns_list=columns_list,
445
- sequence_column=sequence_column, seq_max=seq_max,
446
- set_table=set_table, api_name='copy_to',
447
- chunksize=chunksize, match_column_order=match_column_order)
448
-
449
- dt_obj._validate()
450
-
451
- # If the table created must be a PTI table, then validate additional parameters
452
- # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
453
- # will be ignored - for example, primary_index
454
- if is_pti:
455
- _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
456
- timezero_date, primary_time_index_name, columns_list,
457
- sequence_column, seq_max, types, index, index_label)
458
-
459
- # A table cannot be a SET table and have NO PRIMARY INDEX
460
- if set_table and primary_index is None and timecode_column is None:
461
- raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
462
- MessageCodes.SET_TABLE_NO_PI)
463
-
464
- # Check if destination table exists
465
- table_exists = dt_obj._table_exists(con)
466
-
467
- # Raise an exception when the table exists and if_exists = 'fail'
468
- dt_obj._check_table_exists(is_table_exists=table_exists)
469
-
470
- # Is the input DataFrame a Pandas DataFrame?
471
- is_pandas_df = isinstance(df, pd.DataFrame)
472
-
473
- # Let's also execute the node and set the table_name when df is teradataml DataFrame
474
- if not is_pandas_df and df._table_name is None:
475
- df._table_name = df_utils._execute_node_return_db_object_name(df._nodeid, df._metaexpr)
476
-
477
- # Check table name conflict is present.
478
- is_conflict = _check_table_name_conflict(df, table_name) if isinstance(df, tdmldf.DataFrame) and \
479
- if_exists.lower() == 'replace' else False
480
-
481
- # Create a temporary table name, When table name conflict is present.
482
- if is_conflict:
483
- # Store actual destination table name for later use.
484
- dest_table_name = table_name
485
- table_name = UtilFuncs._generate_temp_table_name(prefix=table_name,
486
- table_type=TeradataConstants.TERADATA_TABLE,
487
- quote=False)
488
-
489
- # Let's create the SQLAlchemy table object to recreate the table
490
- if not table_exists or if_exists.lower() == 'replace':
491
- if not is_pti:
492
- table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
493
- types, None if not is_pandas_df else index,
494
- None if not is_pandas_df else index_label)
495
- else:
496
- table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
497
- primary_time_index_name, timecode_column, timezero_date,
498
- timebucket_duration, sequence_column, seq_max,
499
- columns_list, set_table, types,
500
- None if not is_pandas_df else index,
501
- None if not is_pandas_df else index_label)
502
-
503
- if table is not None:
504
- # If the table need to be replaced and there is no table name conflict,
505
- # let's drop the existing table first
506
- if table_exists and not is_conflict:
507
- tbl_name = dt_obj._get_fully_qualified_table_name()
508
- UtilFuncs._drop_table(tbl_name)
509
- try:
510
- table.create(bind=get_context())
511
- except sqlachemyOperationalError as err:
512
- raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
513
- '\n' + str(err),
514
- MessageCodes.TABLE_OBJECT_CREATION_FAILED)
515
- else:
516
- raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
517
- MessageCodes.TABLE_OBJECT_CREATION_FAILED)
518
-
519
- # Check column compatibility for insertion when table exists and if_exists = 'append'
520
- if table_exists and if_exists.lower() == 'append':
521
- UtilFuncs._get_warnings('set_table', set_table, 'if_exists', 'append')
522
-
523
- table = UtilFuncs._get_sqlalchemy_table(table_name,
524
- schema_name=schema_name)
525
-
526
- if table is not None:
527
- # ELE-2284
528
- # We are not considering types for 'append' mode as it is a simple insert and no casting is applied
529
- if is_pandas_df:
530
- cols = _extract_column_info(df, index=index, index_label=index_label)
531
- else:
532
- cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
533
- if match_column_order:
534
- cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
535
- is_pti, timecode_column, sequence_column)
536
-
537
- if not cols_compatible:
538
- raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
539
- MessageCodes.INSERTION_INCOMPATIBLE)
540
-
541
- # df is a Pandas DataFrame object
542
- if isinstance(df, pd.DataFrame):
543
- if not table_exists or if_exists.lower() == 'replace':
544
- try:
545
- # Support for saving Pandas index/Volatile is by manually inserting rows (batch) for now
546
- if index or is_pti:
547
- _insert_from_dataframe(df, con, schema_name, table_name, index,
548
- chunksize, is_pti, timecode_column,
549
- sequence_column, match_column_order)
550
-
551
- # When index isn't saved & for non-PTI tables, to_sql insertion used (batch)
552
- else:
553
- df.to_sql(table_name, get_context(), if_exists='append', index=False, index_label=None,
554
- chunksize=chunksize, schema=schema_name)
555
-
556
- except sqlachemyOperationalError as err:
557
- if "Duplicate row error" in str(err):
558
- raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_DUPICATE_ROW,
559
- table_name),
560
- MessageCodes.SET_TABLE_DUPICATE_ROW)
561
- else:
562
- raise
563
-
564
- elif table_exists and if_exists.lower() == 'append':
565
- _insert_from_dataframe(df, con, schema_name, table_name, index,
566
- chunksize, is_pti, timecode_column,
567
- sequence_column, match_column_order)
568
-
569
-
570
- # df is a teradataml DataFrame object (to_sql wrapper used)
571
- elif isinstance(df, tdmldf.DataFrame):
572
- df_column_list = [col.name for col in df._metaexpr.c]
573
-
574
- if is_pti:
575
- # Reorder the column list to reposition the timecode and sequence columns
576
- df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
577
-
578
- df_utils._insert_all_from_table(table_name, df._table_name, df_column_list, schema_name, temporary)
579
-
580
- # While table name conflict is present, Delete the source table after creation of temporary table.
581
- # Rename the temporary table to destination table name.
582
- if is_conflict and if_exists.lower() == 'replace':
583
- tbl_name = dt_obj._get_fully_qualified_table_name()
584
- UtilFuncs._drop_table(tbl_name)
585
- _rename_table(table_name, dest_table_name)
586
-
587
-
588
- except (TeradataMlException, ValueError, TypeError):
589
- raise
590
- except Exception as err:
591
- raise TeradataMlException(Messages.get_message(MessageCodes.COPY_TO_SQL_FAIL) + str(err),
592
- MessageCodes.COPY_TO_SQL_FAIL) from err
593
-
594
- def _check_table_name_conflict(df, table_name):
595
- """
596
- Check whether destination "table_name" matches with the teradataml dataframe parent nodes.
597
- This function traverse the DAG graph from child node to root node and checks for table name conflict.
598
-
599
- PARAMETERS:
600
- df:
601
- Required Argument.
602
- Specifies the teradataml DataFrame object to be checked.
603
- Types: teradataml.dataframe.dataframe.DataFrame
604
-
605
- table_name:
606
- Required Argument.
607
- Specifies the name of the table to be created in Vantage.
608
- Types : String
609
-
610
- RETURNS:
611
- A boolean value representing the presence of conflict.
612
-
613
- RAISES:
614
- None
615
-
616
- EXAMPLES:
617
- >>> df = DataFrame("sales")
618
- >>> table_name = "destination_table"
619
- >>> _check_table_name_conflict(df, table_name)
620
- """
621
- aed_obj = AedUtils()
622
- # Check if length of parent node count greater that 0.
623
- if aed_obj._aed_get_parent_node_count(df._nodeid) > 0:
624
- # Let's check "table_name" matches with any of the parent nodes table name.
625
- # Get current table node id.
626
- node_id = df._nodeid
627
- while node_id:
628
-
629
- # Get the parent node id using current table node id.
630
- parent_node_id = aed_obj._aed_get_parent_nodeids(node_id)
631
-
632
- if parent_node_id:
633
- # Check "table_name" matches with the parent "table_name".
634
- # If table name matches, then return 'True'.
635
- # Otherwise, Traverse the graph from current node to the top most root node.
636
- if table_name in aed_obj._aed_get_source_tablename(parent_node_id[0]):
637
- return True
638
- else:
639
- node_id = parent_node_id[0]
640
- else:
641
- # When parent_node_id is empty return 'False'.
642
- return False
643
- return False
644
-
645
-
646
- def _get_sqlalchemy_table_from_tdmldf(df, meta):
647
- """
648
- This is an internal function used to generate an SQLAlchemy Table
649
- object for the underlying table/view of a DataFrame.
650
-
651
- PARAMETERS:
652
- df:
653
- The teradataml DataFrame to generate the SQLAlchemy.Table object for.
654
-
655
- meta:
656
- The SQLAlchemy.Metadata object.
657
-
658
- RETURNS:
659
- SQLAlchemy.Table
660
-
661
- RAISES:
662
- None
663
-
664
- EXAMPLES:
665
- >>> con = get_connection()
666
- >>> df = DataFrame('admissions_train')
667
- >>> meta = sqlalchemy.MetaData()
668
- >>> table = __get_sqlalchemy_table_from_tdmldf(df, meta)
669
-
670
- """
671
- con = get_connection()
672
- db_schema = UtilFuncs._extract_db_name(df._table_name)
673
- db_table_name = UtilFuncs._extract_table_name(df._table_name)
674
-
675
- return Table(db_table_name, meta, schema=db_schema, autoload_with=get_context())
676
-
677
-
678
- def _get_index_labels(df, index_label):
679
- """
680
- Internal function to construct a list of labels for the indices to be saved from the Pandas DataFrames
681
- based on user input and information from the DataFrame.
682
-
683
- PARAMETERS:
684
- df:
685
- The Pandas input DataFrame.
686
-
687
- index_label:
688
- The user provided label(s) for the indices.
689
-
690
- RAISES:
691
- None
692
-
693
- RETURNS:
694
- A list of Strings corresponding the to labels for the indices to add as columns.
695
-
696
- EXAMPLES:
697
- _get_index_labels(df, index_label)
698
- """
699
- default_index_label = 'index_label'
700
- default_level_prefix = 'level_'
701
- level_cnt = 0
702
-
703
- is_multi_index = isinstance(df.index, pd.MultiIndex)
704
- ind_types = [level.dtype for level in df.index.levels] if is_multi_index else [df.index.dtype]
705
-
706
- ind_names = []
707
- if index_label:
708
- ind_names = [index_label] if isinstance(index_label, str) else index_label
709
- else:
710
- for name in df.index.names:
711
- if name not in ('', None):
712
- ind_names.append(name)
713
- else:
714
- if is_multi_index:
715
- ind_names.append(default_level_prefix + str(level_cnt))
716
- level_cnt = level_cnt + 1
717
- else:
718
- df_columns = _get_pd_df_column_names(df)
719
- label = default_level_prefix + str(level_cnt) if default_index_label in df_columns else default_index_label
720
- ind_names.append(label)
721
-
722
- return ind_names, ind_types
723
-
724
- def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
725
- timezero_date, primary_time_index_name, columns_list,
726
- sequence_column, seq_max, types, index, index_label):
727
- """
728
- This is an internal function used to validate the PTI part of copy request.
729
- Dataframe, connection & related parameters are checked.
730
- Saving to Vantage is proceeded to only when validation returns True.
731
-
732
- PARAMETERS:
733
- df:
734
- The DataFrame (Pandas or teradataml) object to be saved.
735
-
736
- timecode_column:
737
- The column in the DataFrame that reflects the form of the timestamp
738
- data in the time series.
739
- Type: String
740
-
741
- timebucket_duration:
742
- A duration that serves to break up the time continuum in
743
- the time series data into discrete groups or buckets.
744
- Type: String
745
-
746
- timezero_date:
747
- Specifies the earliest time series data that the PTI table will accept.
748
- Type: String
749
-
750
- primary_time_index_name:
751
- A name for the Primary Time Index (PTI).
752
- Type: String
753
-
754
- columns_list:
755
- A list of one or more PTI table column names.
756
- Type: String or list of Strings
757
-
758
- sequence_column:
759
- Specifies a column of type Integer with sequences implying that the
760
- time series data readings are not unique.
761
- If not specified, the time series data are assumed to be unique in time.
762
- Type: String
763
-
764
- seq_max:
765
- Specifies the maximum number of sensor data rows that can have the
766
- same timestamp. Can be used when 'sequenced' is True.
767
- Accepted range: 1 - 2147483647.
768
- Type: int
769
-
770
- types:
771
- Dictionary specifying column-name to teradatasqlalchemy type-mapping.
772
-
773
- index:
774
- Flag specifying whether to write Pandas DataFrame index as a column or not.
775
- Type: bool
776
-
777
- index_label:
778
- Column label for index column(s).
779
- Type: String
780
-
781
- RETURNS:
782
- True, when all parameters are valid.
783
-
784
- RAISES:
785
- TeradataMlException, when parameter validation fails.
786
-
787
- EXAMPLES:
788
- _validate_pti_copy_parameters(df = my_df, timecode_column = 'ts', timbucket_duration = 'HOURS(2)')
789
- """
790
- if isinstance(df, pd.DataFrame):
791
- df_columns = _get_pd_df_column_names(df)
792
- else:
793
- df_columns = [col.name for col in df._metaexpr.c]
794
-
795
- awu = AnalyticsWrapperUtils()
796
- awu_matrix = []
797
-
798
- # The arguments added to awu_martix are:
799
- # arg_name, arg, is_optional, acceptable types
800
- # The value for is_optional is set to False when the argument
801
- # a) is a required argument
802
- # b) is not allowed to be None, even if it is optional
803
- awu_matrix.append(['timecode_column', timecode_column, False, (str)])
804
- awu_matrix.append(['columns_list', columns_list, True, (str, list)])
805
- awu_matrix.append(['timezero_date', timezero_date, True, (str)])
806
- awu_matrix.append(['timebucket_duration', timebucket_duration, True, (str)])
807
- awu_matrix.append(['primary_time_index_name', primary_time_index_name, True, (str)])
808
- awu_matrix.append(['sequence_column', sequence_column, True, (str)])
809
- awu_matrix.append(['seq_max', seq_max, True, (int)])
810
-
811
- # Validate types
812
- awu._validate_argument_types(awu_matrix)
813
-
814
- # Validate arg emtpy
815
- awu._validate_input_columns_not_empty(timecode_column, 'timecode_column')
816
- awu._validate_input_columns_not_empty(columns_list, 'columns_list')
817
- awu._validate_input_columns_not_empty(timezero_date, 'timezero_date')
818
- awu._validate_input_columns_not_empty(timebucket_duration, 'timebucket_duration')
819
- awu._validate_input_columns_not_empty(sequence_column, 'sequence_column')
820
-
821
- # Validate all the required arguments and optional arguments when not none
822
- # First the timecode_column
823
- _validate_column_in_list_of_columns('df', df_columns, timecode_column, 'timecode_column')
824
- # Check the type of timecode_column
825
- _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES.value,
826
- types, index, index_label)
827
-
828
- # timezero date
829
- _validate_timezero_date(timezero_date)
830
-
831
- # timebucket duration
832
- _Validators._validate_timebucket_duration(timebucket_duration)
833
-
834
- # Validate sequence_column
835
- if sequence_column is not None:
836
- _validate_column_in_list_of_columns('df', df_columns, sequence_column, 'sequence_column')
837
- # Check the type of sequence_column
838
- _validate_column_type(df, sequence_column, 'sequence_column',
839
- PTITableConstants.VALID_SEQUENCE_COL_DATATYPES.value, types, index, index_label)
840
-
841
- # Validate seq_max
842
- if seq_max is not None and (seq_max < 1 or seq_max > 2147483647):
843
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(seq_max, 'seq_max', '1 < integer < 2147483647'),
844
- MessageCodes.INVALID_ARG_VALUE)
845
-
846
- # Validate cols_list
847
- _validate_columns_list('df', df_columns, columns_list)
848
- if isinstance(columns_list, str):
849
- columns_list = [columns_list]
850
-
851
- # Either one or both of timebucket_duration and columns_list must be specified
852
- if timebucket_duration is None and (columns_list is None or len(columns_list) == 0):
853
- raise TeradataMlException(
854
- Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, 'timebucket_duration', 'columns_list'),
855
- MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
856
-
857
-
858
- def _validate_columns_list(df, df_columns, columns_list):
859
- """
860
- Internal function to validate columns list specified when creating a
861
- Primary Time Index (PTI) table.
862
-
863
- PARAMETERS:
864
- df:
865
- Name of the DataFrame to which the column being validated
866
- does or should belong.
867
-
868
- df_columns:
869
- List of columns in the DataFrame.
870
-
871
- columns_list:
872
- The column or list of columns.
873
- Type: String or list of Strings
874
-
875
- RETURNS:
876
- True if the column or list of columns is valid.
877
-
878
- RAISES:
879
- Raise TeradataMlException on validation failure.
880
- """
881
- if columns_list is None:
882
- return True
883
-
884
- # Validate DF has columns
885
- if isinstance(columns_list, str):
886
- columns_list = [columns_list]
887
-
888
- for col in columns_list:
889
- _validate_column_in_list_of_columns(df, df_columns, col, 'columns_list')
890
-
891
- return True
892
-
893
-
894
- def _validate_column_in_list_of_columns(df, df_columns, col, col_arg):
895
- """
896
- Internal function to validate the arguments used to specify
897
- a column name in DataFrame.
898
-
899
- PARAMETERS:
900
- df:
901
- Name of the DataFrame to which the column being validated
902
- does or should belong.
903
-
904
- df_column_list:
905
- List of columns in the DataFrame.
906
-
907
- col:
908
- Column to be validated.
909
-
910
- col_arg:
911
- Name of argument used to specify the column name.
912
-
913
- RETURNS:
914
- True, if column name is a valid.
915
-
916
- RAISES:
917
- TeradataMlException if invalid column name.
918
- """
919
- if col not in df_columns:
920
- raise TeradataMlException(
921
- Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(col,
922
- col_arg,
923
- df,
924
- 'DataFrame'),
925
- MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND)
926
-
927
- return True
928
-
929
-
930
- def _validate_column_type(df, col, col_arg, expected_types, types = None, index = False, index_label = None):
931
- """
932
- Internal function to validate the type of an input DataFrame column against
933
- a list of expected types.
934
-
935
- PARAMETERS
936
- df:
937
- Input DataFrame (Pandas or teradataml) which has the column to be tested
938
- for type.
939
-
940
- col:
941
- The column in the input DataFrame to be tested for type.
942
-
943
- col_arg:
944
- The name of the argument used to pass the column name.
945
-
946
- expected_types:
947
- Specifies a list of teradatasqlachemy datatypes that the column is
948
- expected to be of type.
949
-
950
- types:
951
- Dictionary specifying column-name to teradatasqlalchemy type-mapping.
952
-
953
- RETURNS:
954
- True, when the columns is of an expected type.
955
-
956
- RAISES:
957
- TeradataMlException, when the columns is not one of the expected types.
958
-
959
- EXAMPLES:
960
- _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES, types)
961
- """
962
- # Check if sequence_column is being translated to a valid_type
963
- if types is not None and col in types:
964
- if not any(isinstance(types[col], expected_type) for expected_type in expected_types):
965
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
966
- format(col_arg, types[col], ' or '.join(expected_type.__visit_name__
967
- for expected_type in expected_types)),
968
- MessageCodes.INVALID_COLUMN_TYPE)
969
- # Else we need to copy without any casting
970
- elif isinstance(df, pd.DataFrame):
971
- t = _get_sqlalchemy_mapping(str(df.dtypes[col]))
972
- if t not in expected_types:
973
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
974
- format(col_arg, t, ' or '.join(expected_type.__visit_name__
975
- for expected_type in expected_types)),
976
- MessageCodes.INVALID_COLUMN_TYPE)
977
- elif not any(isinstance(df[col].type, t) for t in expected_types):
978
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
979
- format(col_arg, df[col].type, ' or '.join(expected_type.__visit_name__
980
- for expected_type in expected_types)),
981
- MessageCodes.INVALID_COLUMN_TYPE)
982
-
983
- return True
984
-
985
-
986
- def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
987
- index_label=None):
988
- """
989
- This is an internal function used to construct a SQLAlchemy Table Object.
990
- This function checks appropriate flags and supports creation of Teradata
991
- specific Table constructs such as Volatile/Primary Index tables.
992
-
993
-
994
- PARAMETERS:
995
- df:
996
- The teradataml or Pandas DataFrame object to be saved.
997
-
998
- table_name:
999
- Name of SQL table.
1000
-
1001
- con:
1002
- A SQLAlchemy connectable (engine/connection) object
1003
-
1004
- primary_index:
1005
- Creates Teradata Table(s) with Primary index column if specified.
1006
-
1007
- temporary:
1008
- Flag specifying whether SQL table to be created is Volatile or not.
1009
-
1010
- schema_name:
1011
- Specifies the name of the SQL schema in the database to write to.
1012
-
1013
- set_table:
1014
- A flag specifying whether to create a SET table or a MULTISET table.
1015
- When True, an attempt to create a SET table is made.
1016
- When False, an attempt to create a MULTISET table is made.
1017
-
1018
- types:
1019
- Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1020
-
1021
- index:
1022
- Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1023
-
1024
- index_label:
1025
- Column label(s) for index column(s).
1026
-
1027
- RETURNS:
1028
- SQLAlchemy Table
1029
-
1030
- RAISES:
1031
- N/A
1032
-
1033
- EXAMPLES:
1034
- _create_table_object(df = my_df, table_name = 'test_table', con = tdconnection, primary_index = None,
1035
- temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1036
- _create_table_object(df = csv_filepath, table_name = 'test_table', con = tdconnection, primary_index = None,
1037
- temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1038
- """
1039
- # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1040
- post_params = {}
1041
- prefix = []
1042
- pti = post(opts=post_params)
1043
-
1044
- if temporary is True:
1045
- pti = pti.on_commit(option='preserve')
1046
- prefix.append('VOLATILE')
1047
-
1048
- if not set_table:
1049
- prefix.append('multiset')
1050
- else:
1051
- prefix.append('set')
1052
-
1053
- meta = MetaData()
1054
- meta.bind = con
1055
-
1056
- if isinstance(df, pd.DataFrame):
1057
- col_names, col_types = _extract_column_info(df, types, index, index_label)
1058
- elif isinstance(df, str):
1059
- col_names, col_types = _extract_column_info(df, types)
1060
- else:
1061
- col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1062
- if types is not None:
1063
- # When user-type provided use, or default when partial types provided.
1064
- col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1065
-
1066
- if primary_index is not None:
1067
- if isinstance(primary_index, list):
1068
- pti = pti.primary_index(unique=False, cols=primary_index)
1069
- elif isinstance(primary_index, str):
1070
- pti = pti.primary_index(unique=False, cols=[primary_index])
1071
- else:
1072
- pti = pti.no_primary_index()
1073
-
1074
- # Create default Table construct with parameter dictionary
1075
- table = Table(table_name, meta,
1076
- *(Column(col_name, col_type)
1077
- for col_name, col_type in
1078
- zip(col_names, col_types)),
1079
- teradatasql_post_create=pti,
1080
- prefixes=prefix,
1081
- schema=schema_name
1082
- )
1083
-
1084
- return table
1085
-
1086
-
1087
- def _create_pti_table_object(df, con, table_name, schema_name, temporary, primary_time_index_name,
1088
- timecode_column, timezero_date, timebucket_duration,
1089
- sequence_column, seq_max, columns_list, set_table, types, index=None, index_label=None):
1090
- """
1091
- This is an internal function used to construct a SQLAlchemy Table Object.
1092
- This function checks appropriate flags and supports creation of Teradata
1093
- specific Table constructs such as Volatile and Primary Time Index tables.
1094
-
1095
- PARAMETERS:
1096
- df:
1097
- The teradataml or Pandas DataFrame object to be saved.
1098
-
1099
- con:
1100
- A SQLAlchemy connectable (engine/connection) object
1101
-
1102
- table_name:
1103
- Name of SQL table.
1104
-
1105
- schema_name:
1106
- Specifies the name of the SQL schema in the database to write to.
1107
-
1108
- temporary:
1109
- Flag specifying whether SQL table to be created is Volatile or not.
1110
-
1111
- primary_time_index_name:
1112
- A name for the Primary Time Index (PTI).
1113
-
1114
- timecode_column:
1115
- The column in the DataFrame that reflects the form of the timestamp
1116
- data in the time series.
1117
-
1118
- timezero_date:
1119
- Specifies the earliest time series data that the PTI table will accept.
1120
-
1121
- timebucket_duration:
1122
- A duration that serves to break up the time continuum in
1123
- the time series data into discrete groups or buckets.
1124
-
1125
- sequence_column:
1126
- Specifies a column with sequences implying that time series data
1127
- readings are not unique. If not specified, the time series data are
1128
- assumed to be unique.
1129
-
1130
- seq_max:
1131
- Specifies the maximum number of sensor data rows that can have the
1132
- same timestamp. Can be used when 'sequenced' is True.
1133
-
1134
- columns_list:
1135
- A list of one or more PTI table column names.
1136
-
1137
- set_table:
1138
- A flag specifying whether to create a SET table or a MULTISET table.
1139
- When True, an attempt to create a SET table is made.
1140
- When False, an attempt to create a MULTISET table is made.
1141
-
1142
- types:
1143
- Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1144
-
1145
- index:
1146
- Flag specifying whether to write Pandas DataFrame index as a column or not.
1147
-
1148
- index_label:
1149
- Column label for index column(s).
1150
-
1151
- RETURNS:
1152
- SQLAlchemy Table
1153
-
1154
- RAISES:
1155
- N/A
1156
-
1157
- EXAMPLES:
1158
- _create_pti_table_object(df = my_df, table_name = 'test_table', con = tdconnection,
1159
- timecode_column = 'ts', columns_list = ['user_id', 'location'])
1160
-
1161
- """
1162
- meta = MetaData()
1163
-
1164
- if isinstance(df, pd.DataFrame):
1165
- col_names, col_types = _extract_column_info(df, types, index, index_label)
1166
- timecode_datatype = col_types[col_names.index(timecode_column)]()
1167
- else:
1168
- col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1169
- if types is not None:
1170
- # When user-type provided use, or default when partial types provided
1171
- col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1172
- timecode_datatype = df[timecode_column].type
1173
-
1174
- # Remove timecode and sequence column from col_name and col_types
1175
- # since the required columns will be created automatically
1176
- if timecode_column in col_names:
1177
- ind = col_names.index(timecode_column)
1178
- col_names.pop(ind)
1179
- col_types.pop(ind)
1180
-
1181
- if sequence_column is not None and sequence_column in col_names:
1182
- ind = col_names.index(sequence_column)
1183
- col_names.pop(ind)
1184
- col_types.pop(ind)
1185
-
1186
- # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1187
- post_params = {}
1188
- prefix = []
1189
- pti = post(opts=post_params)
1190
-
1191
- # Create Table object with appropriate Primary Time Index/Prefix for volatile
1192
- if temporary:
1193
- pti = pti.on_commit(option='preserve')
1194
- prefix.append('VOLATILE')
1195
-
1196
- if not set_table:
1197
- prefix.append('multiset')
1198
- else:
1199
- prefix.append('set')
1200
-
1201
- pti = pti.primary_time_index(timecode_datatype,
1202
- name=primary_time_index_name,
1203
- timezero_date=timezero_date,
1204
- timebucket_duration=timebucket_duration,
1205
- sequenced=True if sequence_column is not None else False,
1206
- seq_max=seq_max,
1207
- cols=columns_list)
1208
-
1209
- table = Table(table_name, meta,
1210
- *(Column(col_name, col_type)
1211
- for col_name, col_type in
1212
- zip(col_names, col_types)),
1213
- teradatasql_post_create=pti,
1214
- prefixes=prefix,
1215
- schema=schema_name
1216
- )
1217
-
1218
- return table
1219
-
1220
-
1221
- def _rename_column(col_names, search_for, rename_to):
1222
- """
1223
- Internal function to rename a column in a list of columns of a Pandas DataFrame.
1224
-
1225
- PARAMETERS:
1226
- col_names:
1227
- Required Argument.
1228
- The list of column names of the Pandas DataFrame.
1229
-
1230
- search_for:
1231
- Required Argument.
1232
- The column name that need to be changed/renamed.
1233
-
1234
- rename_to:
1235
- Required Argument.
1236
- The column name that the 'search_for' column needs to be replaced with.
1237
-
1238
- RETURNS:
1239
- A list of renamed columns list.
1240
-
1241
- EXAMPLES:
1242
- cols = _rename_column(cols, 'col_1', 'new_col_1')
1243
- """
1244
- ind = col_names.index(search_for)
1245
- col_names.pop(ind)
1246
- col_names.insert(ind, rename_to)
1247
-
1248
- return col_names
1249
-
1250
-
1251
- def _rename_to_pti_columns(col_names, timecode_column, sequence_column,
1252
- timecode_column_index=None, sequence_column_index=None):
1253
- """
1254
- Internal function to generate a list of renamed columns of a Pandas DataFrame to match that of the PTI table column names
1255
- in Vantage, or revert any such changes made.
1256
-
1257
- PARAMETERS:
1258
- col_names:
1259
- The list of column names of the Pandas DataFrame.
1260
-
1261
- timecode_column:
1262
- The column name that reflects the timecode column in the PTI table.
1263
-
1264
- sequence_column:
1265
- The column name that reflects the sequence column in the PTI table.
1266
-
1267
- timecode_column_index:
1268
- The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1269
- performed.
1270
-
1271
- sequence_column_index:
1272
- The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1273
- performed.
1274
-
1275
- RETURNS:
1276
- A list of renamed PTI related columns.
1277
-
1278
- EXAMPLES:
1279
- cols = _rename_to_pti_columns(cols, timecode_column, sequence_column, t_index=None, s_index)
1280
- cols = _rename_to_pti_columns(cols, timecode_column, sequence_column)
1281
- """
1282
- # Rename the timecode_column to what it is in Vantage
1283
- if timecode_column_index is not None:
1284
- col_names = _rename_column(col_names, PTITableConstants.TD_TIMECODE.value, timecode_column)
1285
- else:
1286
- col_names = _rename_column(col_names, timecode_column, PTITableConstants.TD_TIMECODE.value)
1287
-
1288
- # Rename the sequence_column to what it is in Vantage
1289
- if sequence_column is not None:
1290
- if sequence_column_index is not None:
1291
- col_names = _rename_column(col_names, PTITableConstants.TD_SEQNO.value, sequence_column)
1292
- else:
1293
- col_names = _rename_column(col_names, sequence_column, PTITableConstants.TD_SEQNO.value)
1294
-
1295
- return col_names
1296
-
1297
-
1298
- def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column, df_col_type_list = None):
1299
- """
1300
- Internal function to reorder the list of columns used to construct the 'INSERT INTO'
1301
- statement as required when the target table is a PTI table.
1302
-
1303
- PARAMETERS:
1304
- df_column_list:
1305
- A list of column names for the columns in the DataFrame.
1306
-
1307
- timecode_column:
1308
- The timecode_columns which should be moved to the first position.
1309
-
1310
- sequence_column:
1311
- The timecode_columns which should be moved to the first position.
1312
-
1313
- df_col_type_list:
1314
- Optionally reorder the list containing the types of the columns to match the
1315
- reordering the of df_column_list.
1316
-
1317
- RETURNS:
1318
- A reordered list of columns names for the columns in the DataFrame.
1319
- If the optional types list is also specified, then a tuple of the list reordered columns names
1320
- and the list of the column types.
1321
-
1322
- EXAMPLE:
1323
- new_colname_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
1324
- new_colname_list, new_type_list = _reorder_insert_list_for_pti(df_column_list, timecode_column,
1325
- sequence_column, df_col_type_list)
1326
- """
1327
- # Reposition timecode (to the first) and sequence column (to the second)
1328
- # in df_column_list
1329
- timecode_column_index = df_column_list.index(timecode_column)
1330
- df_column_list.insert(0, df_column_list.pop(timecode_column_index))
1331
- if df_col_type_list is not None:
1332
- df_col_type_list.insert(0, df_col_type_list.pop(timecode_column_index))
1333
-
1334
- if sequence_column is not None:
1335
- sequence_column_index = df_column_list.index(sequence_column)
1336
- df_column_list.insert(1, df_column_list.pop(sequence_column_index))
1337
- if df_col_type_list is not None:
1338
- df_col_type_list.insert(0, df_col_type_list.pop(sequence_column_index))
1339
-
1340
- if df_col_type_list is not None:
1341
- return df_column_list, df_col_type_list
1342
- else:
1343
- return df_column_list
1344
-
1345
-
1346
- def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
1347
- is_pti=False, timecode_column=None, sequence_column=None):
1348
- """
1349
- Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
1350
- and check if the number of columns and their names are matching to determine table insertion compatibility.
1351
-
1352
- PARAMETERS:
1353
- table1_col_object:
1354
- Specifies a list/collection of SQLAlchemy ColumnExpression Objects for first table.
1355
-
1356
- table2_cols:
1357
- Specifies a list of column names for second table (teradataml DataFrame).
1358
-
1359
- is_pandas_df:
1360
- Flag specifying whether the table objects to check are pandas DataFrames or not
1361
- Default: False
1362
- Note: When this flag is True, table2_cols is passed as a tuple object of
1363
- ([column_names], [column_types])
1364
-
1365
- is_pti:
1366
- Boolean flag indicating if the target table is a PTI table.
1367
-
1368
- timecode_column:
1369
- timecode_column required to order the select expression for the insert.
1370
- It should be the first column in the select expression.
1371
- q
1372
- sequence_column:
1373
- sequence_column required to order the select expression for the insert.
1374
- It should be the second column in the select expression.
1375
-
1376
-
1377
- RETURNS:
1378
- a) True, when insertion compatible (number of columns and their names match)
1379
- b) False, otherwise
1380
-
1381
- RAISES:
1382
- N/A
1383
-
1384
- EXAMPLES:
1385
- _check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
1386
- _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
1387
-
1388
- """
1389
- table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
1390
- table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
1391
-
1392
- # Check for number of columns
1393
- if len(table1_col_names) != len(table2_col_names):
1394
- return False
1395
-
1396
- if is_pti is True:
1397
- # Reposition timecode (to the first) and sequence column (to the second)
1398
- # with their names as generated by the database, in col_name since that
1399
- # is the default position of the columns.
1400
- table2_col_names = _reorder_insert_list_for_pti(table2_col_names, timecode_column, sequence_column)
1401
- table2_col_names = _rename_to_pti_columns(table2_col_names, timecode_column, sequence_column)
1402
-
1403
- # Check for the column names
1404
- for i in range(len(table1_col_names)):
1405
- if table1_col_names[i] != table2_col_names[i]:
1406
- return False
1407
-
1408
- # Number of columns and their names in both List of ColumnExpressions match
1409
- return True
1410
-
1411
-
1412
- def _extract_column_info(df, types = None, index = False, index_label = None):
1413
- """
1414
- This is an internal function used to extract column information for a DF,
1415
- and map to user-specified teradatasqlalchemy types, if specified,
1416
- for Table creation.
1417
-
1418
- PARAMETERS:
1419
- df:
1420
- The Pandas DataFrame object to be saved.
1421
-
1422
- types:
1423
- A python dictionary with column names and required types as key-value pairs.
1424
-
1425
- index:
1426
- Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1427
-
1428
- index_label:
1429
- Column label(s) for index column(s).
1430
-
1431
- RETURNS:
1432
- A tuple with the following elements:
1433
- a) List of DataFrame Column names
1434
- b) List of equivalent teradatasqlalchemy column types
1435
-
1436
- RAISES:
1437
- None
1438
-
1439
- EXAMPLES:
1440
- _extract_column_info(df = my_df)
1441
- _extract_column_info(df = my_df, types = {'id_col': INTEGER})
1442
-
1443
- """
1444
- if isinstance(df, str):
1445
- return list(types.keys()), list(types.values())
1446
-
1447
- col_names = _get_pd_df_column_names(df)
1448
-
1449
- # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1450
- # TIMESTAMP(timezone=True) else map it according to default value.
1451
- col_types = [types.get(col_name) if types and col_name in types else
1452
- TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1453
- and (df[col_name].dt.tz is not None)
1454
- else _get_sqlalchemy_mapping(str(df.dtypes[key]))
1455
- for key, col_name in enumerate(list(df.columns))]
1456
-
1457
- ind_names = []
1458
- ind_types = []
1459
- if index:
1460
- ind_names, ind_types = _get_index_labels(df, index_label)
1461
- ind_types = [types.get(ind_name) if types and ind_name in types
1462
- else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1463
- and (df[ind_name].dt.tz is not None)
1464
- else _get_sqlalchemy_mapping(str(ind_types[key]))
1465
- for key, ind_name in enumerate(ind_names)]
1466
-
1467
- return col_names + ind_names, col_types + ind_types
1468
-
1469
- def _insert_from_dataframe(df, con, schema_name, table_name, index, chunksize,
1470
- is_pti=False, timecode_column=None, sequence_column=None,
1471
- match_column_order=True):
1472
- """
1473
- This is an internal function used to sequentially extract column info from DF,
1474
- iterate rows, and insert rows manually.
1475
- Used for Insertions to Temporary Tables & Tables with Pandas index.
1476
-
1477
- This uses DBAPI's executeMany() which is a batch insertion method.
1478
-
1479
- PARAMETERS:
1480
- df:
1481
- The Pandas DataFrame object to be saved.
1482
-
1483
- con:
1484
- A SQLAlchemy connectable (engine/connection) object
1485
-
1486
- schema_name:
1487
- Name of the schema.
1488
-
1489
- table_name:
1490
- Name of the table.
1491
-
1492
- index:
1493
- Flag specifying whether to write Pandas DataFrame index as a column or not.
1494
-
1495
- chunksize:
1496
- Specifies the number of rows to be loaded in a batch.
1497
- Note:
1498
- This is argument is used only when argument "df" is pandas DataFrame.
1499
-
1500
- is_pti:
1501
- Boolean flag indicating if the table should be a PTI table.
1502
-
1503
- timecode_column:
1504
- timecode_column required to order the select expression for the insert.
1505
- It should be the first column in the select expression.
1506
-
1507
- sequence_column:
1508
- sequence_column required to order the select expression for the insert.
1509
- It should be the second column in the select expression.
1510
-
1511
- match_column_order:
1512
- Specifies the order of the df to be loaded matches the order of the
1513
- existing df or not.
1514
-
1515
- RETURNS:
1516
- N/A
1517
-
1518
- RAISES:
1519
- N/A
1520
-
1521
- EXAMPLES:
1522
- _insert_from_dataframe(df = my_df, con = tdconnection, schema = None, table_name = 'test_table',
1523
- index = True, index_label = None)
1524
- """
1525
- col_names = _get_pd_df_column_names(df)
1526
-
1527
- # Quoted, schema-qualified table name
1528
- table = '"{}"'.format(table_name)
1529
- if schema_name is not None:
1530
- table = '"{}".{}'.format(schema_name, table_name)
1531
-
1532
- try:
1533
-
1534
- if is_pti:
1535
- # This if for non-index columns.
1536
- col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
1537
-
1538
- is_multi_index = isinstance(df.index, pd.MultiIndex)
1539
-
1540
- insert_list = []
1541
-
1542
- if not match_column_order:
1543
- ins = "INSERT INTO {} {} VALUES {};".format(
1544
- table,
1545
- '(' + ', '.join(col_names) + ')',
1546
- '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1547
- if index is True else len(col_names))]) + ')')
1548
- else:
1549
- ins = "INSERT INTO {} VALUES {};".format(
1550
- table,
1551
- '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1552
- if index is True else len(col_names))]) + ')')
1553
-
1554
- rowcount = 0
1555
- # Iterate rows of DataFrame over new re-ordered columns
1556
- for row_index, row in enumerate(df[col_names].itertuples(index=True)):
1557
- ins_dict = ()
1558
- for col_index, x in enumerate(col_names):
1559
- ins_dict = ins_dict + (row[col_index+1],)
1560
-
1561
- if index is True:
1562
- ins_dict = ins_dict + row[0] if is_multi_index else ins_dict + (row[0],)
1563
-
1564
- insert_list.append(ins_dict)
1565
- rowcount = rowcount + 1
1566
-
1567
- # dbapi_batchsize corresponds to the max batch size for the DBAPI driver.
1568
- # Insert the rows once the batch-size reaches the max allowed.
1569
- if rowcount == chunksize:
1570
- # Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
1571
- cur = execute_sql(ins, insert_list)
1572
- if cur is not None:
1573
- cur.close()
1574
- rowcount = 0
1575
- insert_list.clear()
1576
-
1577
- # Insert any remaining rows.
1578
- if rowcount > 0:
1579
- cur = execute_sql(ins, insert_list)
1580
- if cur is not None:
1581
- cur.close()
1582
-
1583
- except Exception:
1584
- raise
1585
-
1586
- def _get_pd_df_column_names(df):
1587
- """
1588
- Internal function to return the names of columns in a Pandas DataFrame.
1589
-
1590
- PARAMETERS
1591
- df:
1592
- The Pandas DataFrame to fetch the column names for.
1593
-
1594
- RETURNS:
1595
- A list of Strings
1596
-
1597
- RAISES:
1598
- None
1599
-
1600
- EXAMPLES:
1601
- _get_pd_df_column_names(df = my_df)
1602
- """
1603
- return df.columns.tolist()
1604
-
1605
- def _get_sqlalchemy_mapping(key):
1606
- """
1607
- This is an internal function used to returns a SQLAlchemy Type Mapping
1608
- for a given Pandas DataFrame column Type.
1609
- Used for Table Object creation internally based on DF column info.
1610
-
1611
- For an unknown key, String (Mapping to VARCHAR) is returned
1612
-
1613
- PARAMETERS:
1614
- key : String representing Pandas type ('int64', 'object' etc.)
1615
-
1616
- RETURNS:
1617
- SQLAlchemy Type (Integer, String, Float, DateTime etc.)
1618
-
1619
- RAISES:
1620
- N/A
1621
-
1622
- EXAMPLES:
1623
- _get_sqlalchemy_mapping(key = 'int64')
1624
- """
1625
- teradata_types_map = _get_all_sqlalchemy_mappings()
1626
-
1627
- if key in teradata_types_map.keys():
1628
- return teradata_types_map.get(key)
1629
- else:
1630
- return VARCHAR(configure.default_varchar_size,charset='UNICODE')
1631
-
1632
-
1633
- def _get_all_sqlalchemy_mappings():
1634
- """
1635
- This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
1636
- It contains mappings from pandas data type to SQLAlchemyTypes
1637
-
1638
- PARAMETERS:
1639
-
1640
- RETURNS:
1641
- dictionary { pandas_type : SQLAlchemy Type}
1642
-
1643
- RAISES:
1644
- N/A
1645
-
1646
- EXAMPLES:
1647
- _get_all_sqlalchemy_mappings()
1648
- """
1649
- teradata_types_map = {'int32':INTEGER, 'int64':BIGINT,
1650
- 'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1651
- 'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1652
- 'float64':FLOAT, 'float32':FLOAT, 'bool':BYTEINT,
1653
- 'datetime64':TIMESTAMP, 'datetime64[ns]':TIMESTAMP,
1654
- 'datetime64[ns, UTC]':TIMESTAMP(timezone=True),
1655
- 'timedelta64[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1656
- 'timedelta[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE')}
1657
-
1658
- return teradata_types_map
1659
-
1660
-
1661
- def _validate_timezero_date(timezero_date):
1662
- """
1663
- Internal function to validate timezero_date specified when creating a
1664
- Primary Time Index (PTI) table.
1665
-
1666
- PARAMETERS:
1667
- timezero_date:
1668
- The timezero_date passed to primary_time_index().
1669
-
1670
- RETURNS:
1671
- True if the value is valid.
1672
-
1673
- RAISES:
1674
- ValueError when the value is invalid.
1675
-
1676
- EXAMPLE:
1677
- _validate_timezero_date("DATE '2011-01-01'")
1678
- _validate_timezero_date('2011-01-01') # Invalid
1679
- """
1680
- # Return True is it is not specified or is None since it is optional
1681
- if timezero_date is None:
1682
- return True
1683
-
1684
- pattern = re.compile(PTITableConstants.PATTERN_TIMEZERO_DATE.value)
1685
- match = pattern.match(timezero_date)
1686
-
1687
- err_msg = Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(timezero_date,
1688
- 'timezero_date',
1689
- "str of format DATE 'YYYY-MM-DD'")
1690
-
1691
- try:
1692
- datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
1693
- except (ValueError, AttributeError):
1694
- raise TeradataMlException(err_msg,
1695
- MessageCodes.INVALID_ARG_VALUE)
1696
-
1697
- # Looks like the value is valid
1698
- return True
1
+ #!/usr/bin/python
2
+ # ##################################################################
3
+ #
4
+ # Copyright 2018 Teradata. All rights reserved.
5
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
6
+ #
7
+ # ##################################################################
8
+
9
+ import re
10
+ import datetime
11
+ import warnings
12
+ import pandas as pd
13
+ import pandas.api.types as pt
14
+
15
+ from sqlalchemy import MetaData, Table, Column
16
+ from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
17
+ from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
18
+ from teradatasqlalchemy import (TIMESTAMP)
19
+ from teradatasqlalchemy import (VARCHAR)
20
+ from teradatasqlalchemy.dialect import TDCreateTablePost as post
21
+ from teradataml.common.aed_utils import AedUtils
22
+ from teradataml.context.context import *
23
+ from teradataml.dataframe import dataframe as tdmldf
24
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
25
+ from teradataml.dbutils.dbutils import _rename_table
26
+ from teradataml.common.utils import UtilFuncs
27
+ from teradataml.options.configure import configure
28
+ from teradataml.common.constants import CopyToConstants, PTITableConstants
29
+ from teradatasql import OperationalError
30
+ from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
+ from teradataml.utils.utils import execute_sql
32
+ from teradataml.utils.validators import _Validators
33
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
34
+
35
+
36
+ @collect_queryband(queryband="CpToSql")
37
+ def copy_to_sql(df, table_name,
38
+ schema_name=None, if_exists='append',
39
+ index=False, index_label=None,
40
+ primary_index=None,
41
+ temporary=False, types = None,
42
+ primary_time_index_name = None,
43
+ timecode_column=None,
44
+ timebucket_duration = None,
45
+ timezero_date = None,
46
+ columns_list=None,
47
+ sequence_column=None,
48
+ seq_max=None,
49
+ set_table=False,
50
+ chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
51
+ match_column_order=True):
52
+ """
53
+ Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
54
+
55
+ PARAMETERS:
56
+
57
+ df:
58
+ Required Argument.
59
+ Specifies the Pandas or teradataml DataFrame object to be saved.
60
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
61
+
62
+ table_name:
63
+ Required Argument.
64
+ Specifies the name of the table to be created in Vantage.
65
+ Types : String
66
+
67
+ schema_name:
68
+ Optional Argument.
69
+ Specifies the name of the SQL schema in Teradata Vantage to write to.
70
+ Types: String
71
+ Default: None (Uses default database schema).
72
+
73
+ Note: schema_name will be ignored when temporary=True.
74
+
75
+ if_exists:
76
+ Optional Argument.
77
+ Specifies the action to take when table already exists in Vantage.
78
+ Types: String
79
+ Possible values: {'fail', 'replace', 'append'}
80
+ - fail: If table exists, do nothing.
81
+ - replace: If table exists, drop it, recreate it, and insert data.
82
+ - append: If table exists, insert data. Create if does not exist.
83
+ Default : append
84
+
85
+ Note: Replacing a table with the contents of a teradataml DataFrame based on
86
+ the same underlying table is not supported.
87
+
88
+ index:
89
+ Optional Argument.
90
+ Specifies whether to save Pandas DataFrame index as a column or not.
91
+ Types : Boolean (True or False)
92
+ Default : False
93
+
94
+ Note: Only use as True when attempting to save Pandas DataFrames (and not with teradataml DataFrames).
95
+
96
+ index_label:
97
+ Optional Argument.
98
+ Specifies the column label(s) for Pandas DataFrame index column(s).
99
+ Types : String or list of strings
100
+ Default : None
101
+
102
+ Note: If index_label is not specified (defaulted to None or is empty) and `index` is True, then
103
+ the 'names' property of the DataFrames index is used as the label(s),
104
+ and if that too is None or empty, then:
105
+ 1) a default label 'index_label' or 'level_0' (when 'index_label' is already taken) is used
106
+ when index is standard.
107
+ 2) default labels 'level_0', 'level_1', etc. are used when index is multi-level index.
108
+
109
+ Only use as True when attempting to save Pandas DataFrames (and not on teradataml DataFrames).
110
+
111
+ primary_index:
112
+ Optional Argument.
113
+ Specifies which column(s) to use as primary index while creating Teradata table(s) in Vantage.
114
+ When None, No Primary Index Teradata tables are created.
115
+ Types : String or list of strings
116
+ Default : None
117
+ Example:
118
+ primary_index = 'my_primary_index'
119
+ primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
120
+
121
+ temporary:
122
+ Optional Argument.
123
+ Specifies whether to creates Vantage tables as permanent or volatile.
124
+ Types : Boolean (True or False)
125
+ Default : False
126
+
127
+ Note: When True:
128
+ 1. volatile Tables are created, and
129
+ 2. schema_name is ignored.
130
+ When False, permanent tables are created.
131
+
132
+ types
133
+ Optional Argument.
134
+ Specifies required data-types for requested columns to be saved in Vantage.
135
+ Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
136
+ Default: None
137
+
138
+ Note:
139
+ 1. This argument accepts a dictionary of columns names and their required teradatasqlalchemy types
140
+ as key-value pairs, allowing to specify a subset of the columns of a specific type.
141
+ i) When the input is a Pandas DataFrame:
142
+ - When only a subset of all columns are provided, the column types for the rest are assigned
143
+ appropriately.
144
+ - When types argument is not provided, the column types are assigned
145
+ as listed in the following table:
146
+ +---------------------------+-----------------------------------------+
147
+ | Pandas/Numpy Type | teradatasqlalchemy Type |
148
+ +---------------------------+-----------------------------------------+
149
+ | int32 | INTEGER |
150
+ +---------------------------+-----------------------------------------+
151
+ | int64 | BIGINT |
152
+ +---------------------------+-----------------------------------------+
153
+ | bool | BYTEINT |
154
+ +---------------------------+-----------------------------------------+
155
+ | float32/float64 | FLOAT |
156
+ +---------------------------+-----------------------------------------+
157
+ | datetime64/datetime64[ns] | TIMESTAMP |
158
+ +---------------------------+-----------------------------------------+
159
+ | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
160
+ +---------------------------+-----------------------------------------+
161
+ | Any other data type | VARCHAR(configure.default_varchar_size) |
162
+ +---------------------------+-----------------------------------------+
163
+ ii) When the input is a teradataml DataFrame:
164
+ - When only a subset of all columns are provided, the column types for the rest are retained.
165
+ - When types argument is not provided, the column types are retained.
166
+ 2. This argument does not have any effect when the table specified using table_name and schema_name
167
+ exists and if_exists = 'append'.
168
+
169
+ primary_time_index_name:
170
+ Optional Argument.
171
+ Specifies a name for the Primary Time Index (PTI) when the table
172
+ to be created must be a PTI table.
173
+ Type: String
174
+
175
+ Note: This argument is not required or used when the table to be created
176
+ is not a PTI table. It will be ignored if specified without the timecode_column.
177
+
178
+ timecode_column:
179
+ Optional argument.
180
+ Required when the DataFrame must be saved as a PTI table.
181
+ Specifies the column in the DataFrame that reflects the form
182
+ of the timestamp data in the time series.
183
+ This column will be the TD_TIMECODE column in the table created.
184
+ It should be of SQL type TIMESTAMP(n), TIMESTAMP(n) WITH TIMEZONE, or DATE,
185
+ corresponding to Python types datetime.datetime or datetime.date, or Pandas dtype datetime64[ns].
186
+ Type: String
187
+
188
+ Note: When you specify this parameter, an attempt to create a PTI table
189
+ will be made. This argument is not required when the table to be created
190
+ is not a PTI table. If this argument is specified, primary_index will be ignored.
191
+
192
+ timezero_date:
193
+ Optional Argument.
194
+ Used when the DataFrame must be saved as a PTI table.
195
+ Specifies the earliest time series data that the PTI table will accept;
196
+ a date that precedes the earliest date in the time series data.
197
+ Value specified must be of the following format: DATE 'YYYY-MM-DD'
198
+ Default Value: DATE '1970-01-01'.
199
+ Type: String
200
+
201
+ Note: This argument is not required or used when the table to be created
202
+ is not a PTI table. It will be ignored if specified without the timecode_column.
203
+
204
+ timebucket_duration:
205
+ Optional Argument.
206
+ Required if columns_list is not specified or is None.
207
+ Used when the DataFrame must be saved as a PTI table.
208
+ Specifies a duration that serves to break up the time continuum in
209
+ the time series data into discrete groups or buckets.
210
+ Specified using the formal form time_unit(n), where n is a positive
211
+ integer, and time_unit can be any of the following:
212
+ CAL_YEARS, CAL_MONTHS, CAL_DAYS, WEEKS, DAYS, HOURS, MINUTES,
213
+ SECONDS, MILLISECONDS, or MICROSECONDS.
214
+ Type: String
215
+
216
+ Note: This argument is not required or used when the table to be created
217
+ is not a PTI table. It will be ignored if specified without the timecode_column.
218
+
219
+ columns_list:
220
+ Optional Argument.
221
+ Used when the DataFrame must be saved as a PTI table.
222
+ Required if timebucket_duration is not specified.
223
+ A list of one or more PTI table column names.
224
+ Type: String or list of Strings
225
+
226
+ Note: This argument is not required or used when the table to be created
227
+ is not a PTI table. It will be ignored if specified without the timecode_column.
228
+
229
+ sequence_column:
230
+ Optional Argument.
231
+ Used when the DataFrame must be saved as a PTI table.
232
+ Specifies the column of type Integer containing the unique identifier for
233
+ time series data readings when they are not unique in time.
234
+ * When specified, implies SEQUENCED, meaning more than one reading from the same
235
+ sensor may have the same timestamp.
236
+ This column will be the TD_SEQNO column in the table created.
237
+ * When not specified, implies NONSEQUENCED, meaning there is only one sensor reading
238
+ per timestamp.
239
+ This is the default.
240
+ Type: str
241
+
242
+ Note: This argument is not required or used when the table to be created
243
+ is not a PTI table. It will be ignored if specified without the timecode_column.
244
+
245
+ seq_max:
246
+ Optional Argument.
247
+ Used when the DataFrame must be saved as a PTI table.
248
+ Specifies the maximum number of sensor data rows that can have the
249
+ same timestamp. Can be used when 'sequenced' is True.
250
+ Accepted range: 1 - 2147483647.
251
+ Default Value: 20000.
252
+ Type: int
253
+
254
+ Note: This argument is not required or used when the table to be created
255
+ is not a PTI table. It will be ignored if specified without the timecode_column.
256
+
257
+ set_table:
258
+ Optional Argument.
259
+ Specifies a flag to determine whether to create a SET or a MULTISET table.
260
+ When True, a SET table is created.
261
+ When False, a MULTISET table is created.
262
+ Default Value: False
263
+ Type: boolean
264
+
265
+ Note: 1. Specifying set_table=True also requires specifying primary_index or timecode_column.
266
+ 2. Creating SET table (set_table=True) may result in
267
+ a. an error if the source is a Pandas DataFrame having duplicate rows.
268
+ b. loss of duplicate rows if the source is a teradataml DataFrame.
269
+ 3. This argument has no effect if the table already exists and if_exists='append'.
270
+
271
+ chunksize:
272
+ Optional Argument.
273
+ Specifies the number of rows to be loaded in a batch.
274
+ Note:
275
+ This is argument is used only when argument "df" is pandas DataFrame.
276
+ Default Value: 16383
277
+ Types: int
278
+
279
+ match_column_order:
280
+ Optional Argument.
281
+ Specifies whether the order of the columns in existing table matches the order of
282
+ the columns in the "df" or not. When set to False, the dataframe to be loaded can
283
+ have any order and number of columns.
284
+ Default Value: True
285
+ Types: bool
286
+
287
+ RETURNS:
288
+ None
289
+
290
+ RAISES:
291
+ TeradataMlException
292
+
293
+ EXAMPLES:
294
+ 1. Saving a Pandas DataFrame:
295
+
296
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
297
+ >>> from teradatasqlalchemy.types import *
298
+
299
+ >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
300
+ 'emp_sage': [100, 200, 300, 400],
301
+ 'emp_id': [133, 144, 155, 177],
302
+ 'marks': [99.99, 97.32, 94.67, 91.00]
303
+ }
304
+
305
+ >>> pandas_df = pd.DataFrame(df)
306
+
307
+ a) Save a Pandas DataFrame using a dataframe & table name only:
308
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table')
309
+
310
+ b) Saving as a SET table
311
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_set_table', index=True,
312
+ primary_index='index_label', set_table=True)
313
+
314
+ c) Save a Pandas DataFrame by specifying additional parameters:
315
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table_2', schema_name = 'alice',
316
+ index = True, index_label = 'my_index_label', temporary = False,
317
+ primary_index = ['emp_id'], if_exists = 'append',
318
+ types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
319
+ 'emp_id': BIGINT, 'marks': DECIMAL})
320
+
321
+ d) Saving with additional parameters as a SET table
322
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
323
+ index = True, index_label = 'my_index_label', temporary = False,
324
+ primary_index = ['emp_id'], if_exists = 'append',
325
+ types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
326
+ 'emp_id': BIGINT, 'marks': DECIMAL},
327
+ set_table=True)
328
+
329
+ e) Saving levels in index of type MultiIndex
330
+ >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
331
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
332
+ index = True, index_label = ['index1', 'index2'], temporary = False,
333
+ primary_index = ['index1'], if_exists = 'replace')
334
+
335
+ 2. Saving a teradataml DataFrame:
336
+
337
+ >>> from teradataml.dataframe.dataframe import DataFrame
338
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
339
+ >>> from teradatasqlalchemy.types import *
340
+ >>> from teradataml.data.load_example_data import load_example_data
341
+
342
+ >>> # Load the data to run the example.
343
+ >>> load_example_data("glm", "admissions_train")
344
+
345
+ >>> # Create teradataml DataFrame(s)
346
+ >>> df = DataFrame('admissions_train')
347
+ >>> df2 = df.select(['gpa', 'masters'])
348
+
349
+ a) Save a teradataml DataFrame by using only a table name:
350
+ >>> df2.to_sql('my_tdml_table')
351
+
352
+ b) Save a teradataml DataFrame by using additional parameters:
353
+ >>> df2.to_sql(table_name = 'my_tdml_table', if_exists='append',
354
+ primary_index = ['gpa'], temporary=False, schema_name='alice')
355
+
356
+ c) Alternatively, save a teradataml DataFrame by using copy_to_sql:
357
+ >>> copy_to_sql(df2, 'my_tdml_table_2')
358
+
359
+ d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
360
+ >>> copy_to_sql(df = df2, table_name = 'my_tdml_table_3', schema_name = 'alice',
361
+ temporary = False, primary_index = None, if_exists = 'append',
362
+ types = {'masters': VARCHAR, 'gpa':INTEGER})
363
+
364
+ e) Saving as a SET table
365
+ >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name = 'alice',
366
+ temporary = False, primary_index = ['gpa'], if_exists = 'append',
367
+ types = {'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
368
+
369
+ 3. Saving a teradataml DataFrame as a PTI table:
370
+
371
+ >>> from teradataml.dataframe.dataframe import DataFrame
372
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
373
+ >>> from teradataml.data.load_example_data import load_example_data
374
+
375
+ >>> load_example_data("sessionize", "sessionize_table")
376
+ >>> df3 = DataFrame('sessionize_table')
377
+
378
+ a) Using copy_to_sql
379
+ >>> copy_to_sql(df3, "test_copyto_pti",
380
+ timecode_column='clicktime',
381
+ columns_list='event')
382
+
383
+ b) Alternatively, using DataFrame.to_sql
384
+ >>> df3.to_sql(table_name = "test_copyto_pti_1",
385
+ timecode_column='clicktime',
386
+ columns_list='event')
387
+
388
+ c) Saving as a SET table
389
+ >>> copy_to_sql(df3, "test_copyto_pti_2",
390
+ timecode_column='clicktime',
391
+ columns_list='event',
392
+ set_table=True)
393
+
394
+ """
395
+ # Deriving global connection using get_connection().
396
+ con = get_connection()
397
+
398
+ try:
399
+ if con is None:
400
+ raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE), MessageCodes.CONNECTION_FAILURE)
401
+
402
+ # Check if the table to be created must be a Primary Time Index (PTI) table.
403
+ # If a user specifies the timecode_column parameter, and attempt to create
404
+ # a PTI will be made.
405
+ is_pti = False
406
+ if timecode_column is not None:
407
+ is_pti = True
408
+ if primary_index is not None:
409
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
410
+ 'primary_index',
411
+ 'timecode_column',
412
+ 'specified'), stacklevel=2)
413
+ else:
414
+ ignored = []
415
+ if timezero_date is not None: ignored.append('timezero_date')
416
+ if timebucket_duration is not None: ignored.append('timebucket_duration')
417
+ if sequence_column is not None: ignored.append('sequence_column')
418
+ if seq_max is not None: ignored.append('seq_max')
419
+ if columns_list is not None and (
420
+ not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
421
+ if primary_time_index_name is not None: ignored.append('primary_time_index_name')
422
+ if len(ignored) > 0:
423
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
424
+ ignored,
425
+ 'timecode_column',
426
+ 'missing'), stacklevel=2)
427
+
428
+ # Unset schema_name when temporary is True since volatile tables are always in the user database
429
+ if temporary is True:
430
+ if schema_name is not None:
431
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
432
+ 'schema_name',
433
+ 'temporary=True',
434
+ 'specified'), stacklevel=2)
435
+ schema_name = None
436
+
437
+ # Validate DataFrame & related flags; Proceed only when True
438
+ from teradataml.dataframe.data_transfer import _DataTransferUtils
439
+ dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name,
440
+ if_exists=if_exists, index=index, index_label=index_label,
441
+ primary_index=primary_index, temporary=temporary,
442
+ types=types, primary_time_index_name=primary_time_index_name,
443
+ timecode_column=timecode_column,
444
+ timebucket_duration=timebucket_duration,
445
+ timezero_date=timezero_date, columns_list=columns_list,
446
+ sequence_column=sequence_column, seq_max=seq_max,
447
+ set_table=set_table, api_name='copy_to',
448
+ chunksize=chunksize, match_column_order=match_column_order)
449
+
450
+ dt_obj._validate()
451
+
452
+ # If the table created must be a PTI table, then validate additional parameters
453
+ # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
454
+ # will be ignored - for example, primary_index
455
+ if is_pti:
456
+ _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
457
+ timezero_date, primary_time_index_name, columns_list,
458
+ sequence_column, seq_max, types, index, index_label)
459
+
460
+ # A table cannot be a SET table and have NO PRIMARY INDEX
461
+ if set_table and primary_index is None and timecode_column is None:
462
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
463
+ MessageCodes.SET_TABLE_NO_PI)
464
+
465
+ # Check if destination table exists
466
+ table_exists = dt_obj._table_exists(con)
467
+
468
+ # Raise an exception when the table exists and if_exists = 'fail'
469
+ dt_obj._check_table_exists(is_table_exists=table_exists)
470
+
471
+ # Is the input DataFrame a Pandas DataFrame?
472
+ is_pandas_df = isinstance(df, pd.DataFrame)
473
+
474
+ # Let's also execute the node and set the table_name when df is teradataml DataFrame
475
+ if not is_pandas_df and df._table_name is None:
476
+ df._table_name = df_utils._execute_node_return_db_object_name(df._nodeid, df._metaexpr)
477
+
478
+ # Check table name conflict is present.
479
+ is_conflict = _check_table_name_conflict(df, table_name) if isinstance(df, tdmldf.DataFrame) and \
480
+ if_exists.lower() == 'replace' else False
481
+
482
+ # Create a temporary table name, When table name conflict is present.
483
+ if is_conflict:
484
+ # Store actual destination table name for later use.
485
+ dest_table_name = table_name
486
+ table_name = UtilFuncs._generate_temp_table_name(prefix=table_name,
487
+ table_type=TeradataConstants.TERADATA_TABLE,
488
+ quote=False)
489
+
490
+ # Let's create the SQLAlchemy table object to recreate the table
491
+ if not table_exists or if_exists.lower() == 'replace':
492
+ if not is_pti:
493
+ table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
494
+ types, None if not is_pandas_df else index,
495
+ None if not is_pandas_df else index_label)
496
+ else:
497
+ table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
498
+ primary_time_index_name, timecode_column, timezero_date,
499
+ timebucket_duration, sequence_column, seq_max,
500
+ columns_list, set_table, types,
501
+ None if not is_pandas_df else index,
502
+ None if not is_pandas_df else index_label)
503
+
504
+ if table is not None:
505
+ # If the table need to be replaced and there is no table name conflict,
506
+ # let's drop the existing table first
507
+ if table_exists and not is_conflict:
508
+ tbl_name = dt_obj._get_fully_qualified_table_name()
509
+ UtilFuncs._drop_table(tbl_name)
510
+ try:
511
+ table.create(bind=get_context())
512
+ except sqlachemyOperationalError as err:
513
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
514
+ '\n' + str(err),
515
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
516
+ else:
517
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
518
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
519
+
520
+ # Check column compatibility for insertion when table exists and if_exists = 'append'
521
+ if table_exists and if_exists.lower() == 'append':
522
+ UtilFuncs._get_warnings('set_table', set_table, 'if_exists', 'append')
523
+
524
+ table = UtilFuncs._get_sqlalchemy_table(table_name,
525
+ schema_name=schema_name)
526
+
527
+ if table is not None:
528
+ # ELE-2284
529
+ # We are not considering types for 'append' mode as it is a simple insert and no casting is applied
530
+ if is_pandas_df:
531
+ cols = _extract_column_info(df, index=index, index_label=index_label)
532
+ else:
533
+ cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
534
+ if match_column_order:
535
+ cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
536
+ is_pti, timecode_column, sequence_column)
537
+
538
+ if not cols_compatible:
539
+ raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
540
+ MessageCodes.INSERTION_INCOMPATIBLE)
541
+
542
+ # df is a Pandas DataFrame object
543
+ if isinstance(df, pd.DataFrame):
544
+ if not table_exists or if_exists.lower() == 'replace':
545
+ try:
546
+ # Support for saving Pandas index/Volatile is by manually inserting rows (batch) for now
547
+ if index or is_pti:
548
+ _insert_from_dataframe(df, con, schema_name, table_name, index,
549
+ chunksize, is_pti, timecode_column,
550
+ sequence_column, match_column_order)
551
+
552
+ # When index isn't saved & for non-PTI tables, to_sql insertion used (batch)
553
+ else:
554
+ # Empty queryband buffer before SQL call.
555
+ UtilFuncs._set_queryband()
556
+ df.to_sql(table_name, get_context(), if_exists='append', index=False, index_label=None,
557
+ chunksize=chunksize, schema=schema_name)
558
+
559
+ except sqlachemyOperationalError as err:
560
+ if "Duplicate row error" in str(err):
561
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_DUPICATE_ROW,
562
+ table_name),
563
+ MessageCodes.SET_TABLE_DUPICATE_ROW)
564
+ else:
565
+ raise
566
+
567
+ elif table_exists and if_exists.lower() == 'append':
568
+ _insert_from_dataframe(df, con, schema_name, table_name, index,
569
+ chunksize, is_pti, timecode_column,
570
+ sequence_column, match_column_order)
571
+
572
+ # df is a teradataml DataFrame object (to_sql wrapper used)
573
+ elif isinstance(df, tdmldf.DataFrame):
574
+ df_column_list = [col.name for col in df._metaexpr.c]
575
+
576
+ if is_pti:
577
+ # Reorder the column list to reposition the timecode and sequence columns
578
+ df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
579
+
580
+ source_tbl_name = UtilFuncs._extract_table_name(df._table_name)
581
+ from_schema_name = UtilFuncs._extract_db_name(df._table_name)
582
+
583
+ df_utils._insert_all_from_table(table_name, source_tbl_name, df_column_list,
584
+ to_schema_name=schema_name,
585
+ from_schema_name=from_schema_name,
586
+ temporary=temporary)
587
+
588
+ # While table name conflict is present, Delete the source table after creation of temporary table.
589
+ # Rename the temporary table to destination table name.
590
+ if is_conflict and if_exists.lower() == 'replace':
591
+ tbl_name = dt_obj._get_fully_qualified_table_name()
592
+ UtilFuncs._drop_table(tbl_name)
593
+ _rename_table(table_name, dest_table_name)
594
+
595
+
596
+ except (TeradataMlException, ValueError, TypeError):
597
+ raise
598
+ except Exception as err:
599
+ raise TeradataMlException(Messages.get_message(MessageCodes.COPY_TO_SQL_FAIL) + str(err),
600
+ MessageCodes.COPY_TO_SQL_FAIL) from err
601
+
602
+
603
+ def _check_table_name_conflict(df, table_name):
604
+ """
605
+ Check whether destination "table_name" matches with the teradataml dataframe parent nodes.
606
+ This function traverse the DAG graph from child node to root node and checks for table name conflict.
607
+
608
+ PARAMETERS:
609
+ df:
610
+ Required Argument.
611
+ Specifies the teradataml DataFrame object to be checked.
612
+ Types: teradataml.dataframe.dataframe.DataFrame
613
+
614
+ table_name:
615
+ Required Argument.
616
+ Specifies the name of the table to be created in Vantage.
617
+ Types : String
618
+
619
+ RETURNS:
620
+ A boolean value representing the presence of conflict.
621
+
622
+ RAISES:
623
+ None
624
+
625
+ EXAMPLES:
626
+ >>> df = DataFrame("sales")
627
+ >>> table_name = "destination_table"
628
+ >>> _check_table_name_conflict(df, table_name)
629
+ """
630
+ aed_obj = AedUtils()
631
+ # Check if length of parent node count greater that 0.
632
+ if aed_obj._aed_get_parent_node_count(df._nodeid) > 0:
633
+ # Let's check "table_name" matches with any of the parent nodes table name.
634
+ # Get current table node id.
635
+ node_id = df._nodeid
636
+ while node_id:
637
+
638
+ # Get the parent node id using current table node id.
639
+ parent_node_id = aed_obj._aed_get_parent_nodeids(node_id)
640
+
641
+ if parent_node_id:
642
+ # Check "table_name" matches with the parent "table_name".
643
+ # If table name matches, then return 'True'.
644
+ # Otherwise, Traverse the graph from current node to the top most root node.
645
+ if table_name in aed_obj._aed_get_source_tablename(parent_node_id[0]):
646
+ return True
647
+ else:
648
+ node_id = parent_node_id[0]
649
+ else:
650
+ # When parent_node_id is empty return 'False'.
651
+ return False
652
+ return False
653
+
654
+
655
+ def _get_sqlalchemy_table_from_tdmldf(df, meta):
656
+ """
657
+ This is an internal function used to generate an SQLAlchemy Table
658
+ object for the underlying table/view of a DataFrame.
659
+
660
+ PARAMETERS:
661
+ df:
662
+ The teradataml DataFrame to generate the SQLAlchemy.Table object for.
663
+
664
+ meta:
665
+ The SQLAlchemy.Metadata object.
666
+
667
+ RETURNS:
668
+ SQLAlchemy.Table
669
+
670
+ RAISES:
671
+ None
672
+
673
+ EXAMPLES:
674
+ >>> con = get_connection()
675
+ >>> df = DataFrame('admissions_train')
676
+ >>> meta = sqlalchemy.MetaData()
677
+ >>> table = __get_sqlalchemy_table_from_tdmldf(df, meta)
678
+
679
+ """
680
+ con = get_connection()
681
+ db_schema = UtilFuncs._extract_db_name(df._table_name)
682
+ db_table_name = UtilFuncs._extract_table_name(df._table_name)
683
+
684
+ return Table(db_table_name, meta, schema=db_schema, autoload_with=get_context())
685
+
686
+
687
+ def _get_index_labels(df, index_label):
688
+ """
689
+ Internal function to construct a list of labels for the indices to be saved from the Pandas DataFrames
690
+ based on user input and information from the DataFrame.
691
+
692
+ PARAMETERS:
693
+ df:
694
+ The Pandas input DataFrame.
695
+
696
+ index_label:
697
+ The user provided label(s) for the indices.
698
+
699
+ RAISES:
700
+ None
701
+
702
+ RETURNS:
703
+ A list of Strings corresponding the to labels for the indices to add as columns.
704
+
705
+ EXAMPLES:
706
+ _get_index_labels(df, index_label)
707
+ """
708
+ default_index_label = 'index_label'
709
+ default_level_prefix = 'level_'
710
+ level_cnt = 0
711
+
712
+ is_multi_index = isinstance(df.index, pd.MultiIndex)
713
+ ind_types = [level.dtype for level in df.index.levels] if is_multi_index else [df.index.dtype]
714
+
715
+ ind_names = []
716
+ if index_label:
717
+ ind_names = [index_label] if isinstance(index_label, str) else index_label
718
+ else:
719
+ for name in df.index.names:
720
+ if name not in ('', None):
721
+ ind_names.append(name)
722
+ else:
723
+ if is_multi_index:
724
+ ind_names.append(default_level_prefix + str(level_cnt))
725
+ level_cnt = level_cnt + 1
726
+ else:
727
+ df_columns = _get_pd_df_column_names(df)
728
+ label = default_level_prefix + str(level_cnt) if default_index_label in df_columns else default_index_label
729
+ ind_names.append(label)
730
+
731
+ return ind_names, ind_types
732
+
733
+
734
+ def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
735
+ timezero_date, primary_time_index_name, columns_list,
736
+ sequence_column, seq_max, types, index, index_label):
737
+ """
738
+ This is an internal function used to validate the PTI part of copy request.
739
+ Dataframe, connection & related parameters are checked.
740
+ Saving to Vantage is proceeded to only when validation returns True.
741
+
742
+ PARAMETERS:
743
+ df:
744
+ The DataFrame (Pandas or teradataml) object to be saved.
745
+
746
+ timecode_column:
747
+ The column in the DataFrame that reflects the form of the timestamp
748
+ data in the time series.
749
+ Type: String
750
+
751
+ timebucket_duration:
752
+ A duration that serves to break up the time continuum in
753
+ the time series data into discrete groups or buckets.
754
+ Type: String
755
+
756
+ timezero_date:
757
+ Specifies the earliest time series data that the PTI table will accept.
758
+ Type: String
759
+
760
+ primary_time_index_name:
761
+ A name for the Primary Time Index (PTI).
762
+ Type: String
763
+
764
+ columns_list:
765
+ A list of one or more PTI table column names.
766
+ Type: String or list of Strings
767
+
768
+ sequence_column:
769
+ Specifies a column of type Integer with sequences implying that the
770
+ time series data readings are not unique.
771
+ If not specified, the time series data are assumed to be unique in time.
772
+ Type: String
773
+
774
+ seq_max:
775
+ Specifies the maximum number of sensor data rows that can have the
776
+ same timestamp. Can be used when 'sequenced' is True.
777
+ Accepted range: 1 - 2147483647.
778
+ Type: int
779
+
780
+ types:
781
+ Dictionary specifying column-name to teradatasqlalchemy type-mapping.
782
+
783
+ index:
784
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
785
+ Type: bool
786
+
787
+ index_label:
788
+ Column label for index column(s).
789
+ Type: String
790
+
791
+ RETURNS:
792
+ True, when all parameters are valid.
793
+
794
+ RAISES:
795
+ TeradataMlException, when parameter validation fails.
796
+
797
+ EXAMPLES:
798
+ _validate_pti_copy_parameters(df = my_df, timecode_column = 'ts', timbucket_duration = 'HOURS(2)')
799
+ """
800
+ if isinstance(df, pd.DataFrame):
801
+ df_columns = _get_pd_df_column_names(df)
802
+ else:
803
+ df_columns = [col.name for col in df._metaexpr.c]
804
+
805
+ awu = AnalyticsWrapperUtils()
806
+ awu_matrix = []
807
+
808
+ # The arguments added to awu_martix are:
809
+ # arg_name, arg, is_optional, acceptable types
810
+ # The value for is_optional is set to False when the argument
811
+ # a) is a required argument
812
+ # b) is not allowed to be None, even if it is optional
813
+ awu_matrix.append(['timecode_column', timecode_column, False, (str)])
814
+ awu_matrix.append(['columns_list', columns_list, True, (str, list)])
815
+ awu_matrix.append(['timezero_date', timezero_date, True, (str)])
816
+ awu_matrix.append(['timebucket_duration', timebucket_duration, True, (str)])
817
+ awu_matrix.append(['primary_time_index_name', primary_time_index_name, True, (str)])
818
+ awu_matrix.append(['sequence_column', sequence_column, True, (str)])
819
+ awu_matrix.append(['seq_max', seq_max, True, (int)])
820
+
821
+ # Validate types
822
+ awu._validate_argument_types(awu_matrix)
823
+
824
+ # Validate arg emtpy
825
+ awu._validate_input_columns_not_empty(timecode_column, 'timecode_column')
826
+ awu._validate_input_columns_not_empty(columns_list, 'columns_list')
827
+ awu._validate_input_columns_not_empty(timezero_date, 'timezero_date')
828
+ awu._validate_input_columns_not_empty(timebucket_duration, 'timebucket_duration')
829
+ awu._validate_input_columns_not_empty(sequence_column, 'sequence_column')
830
+
831
+ # Validate all the required arguments and optional arguments when not none
832
+ # First the timecode_column
833
+ _validate_column_in_list_of_columns('df', df_columns, timecode_column, 'timecode_column')
834
+ # Check the type of timecode_column
835
+ _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES.value,
836
+ types, index, index_label)
837
+
838
+ # timezero date
839
+ _validate_timezero_date(timezero_date)
840
+
841
+ # timebucket duration
842
+ _Validators._validate_timebucket_duration(timebucket_duration)
843
+
844
+ # Validate sequence_column
845
+ if sequence_column is not None:
846
+ _validate_column_in_list_of_columns('df', df_columns, sequence_column, 'sequence_column')
847
+ # Check the type of sequence_column
848
+ _validate_column_type(df, sequence_column, 'sequence_column',
849
+ PTITableConstants.VALID_SEQUENCE_COL_DATATYPES.value, types, index, index_label)
850
+
851
+ # Validate seq_max
852
+ if seq_max is not None and (seq_max < 1 or seq_max > 2147483647):
853
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(seq_max, 'seq_max', '1 < integer < 2147483647'),
854
+ MessageCodes.INVALID_ARG_VALUE)
855
+
856
+ # Validate cols_list
857
+ _validate_columns_list('df', df_columns, columns_list)
858
+ if isinstance(columns_list, str):
859
+ columns_list = [columns_list]
860
+
861
+ # Either one or both of timebucket_duration and columns_list must be specified
862
+ if timebucket_duration is None and (columns_list is None or len(columns_list) == 0):
863
+ raise TeradataMlException(
864
+ Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, 'timebucket_duration', 'columns_list'),
865
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
866
+
867
+
868
+ def _validate_columns_list(df, df_columns, columns_list):
869
+ """
870
+ Internal function to validate columns list specified when creating a
871
+ Primary Time Index (PTI) table.
872
+
873
+ PARAMETERS:
874
+ df:
875
+ Name of the DataFrame to which the column being validated
876
+ does or should belong.
877
+
878
+ df_columns:
879
+ List of columns in the DataFrame.
880
+
881
+ columns_list:
882
+ The column or list of columns.
883
+ Type: String or list of Strings
884
+
885
+ RETURNS:
886
+ True if the column or list of columns is valid.
887
+
888
+ RAISES:
889
+ Raise TeradataMlException on validation failure.
890
+ """
891
+ if columns_list is None:
892
+ return True
893
+
894
+ # Validate DF has columns
895
+ if isinstance(columns_list, str):
896
+ columns_list = [columns_list]
897
+
898
+ for col in columns_list:
899
+ _validate_column_in_list_of_columns(df, df_columns, col, 'columns_list')
900
+
901
+ return True
902
+
903
+
904
+ def _validate_column_in_list_of_columns(df, df_columns, col, col_arg):
905
+ """
906
+ Internal function to validate the arguments used to specify
907
+ a column name in DataFrame.
908
+
909
+ PARAMETERS:
910
+ df:
911
+ Name of the DataFrame to which the column being validated
912
+ does or should belong.
913
+
914
+ df_column_list:
915
+ List of columns in the DataFrame.
916
+
917
+ col:
918
+ Column to be validated.
919
+
920
+ col_arg:
921
+ Name of argument used to specify the column name.
922
+
923
+ RETURNS:
924
+ True, if column name is a valid.
925
+
926
+ RAISES:
927
+ TeradataMlException if invalid column name.
928
+ """
929
+ if col not in df_columns:
930
+ raise TeradataMlException(
931
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(col,
932
+ col_arg,
933
+ df,
934
+ 'DataFrame'),
935
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND)
936
+
937
+ return True
938
+
939
+
940
+ def _validate_column_type(df, col, col_arg, expected_types, types = None, index = False, index_label = None):
941
+ """
942
+ Internal function to validate the type of an input DataFrame column against
943
+ a list of expected types.
944
+
945
+ PARAMETERS
946
+ df:
947
+ Input DataFrame (Pandas or teradataml) which has the column to be tested
948
+ for type.
949
+
950
+ col:
951
+ The column in the input DataFrame to be tested for type.
952
+
953
+ col_arg:
954
+ The name of the argument used to pass the column name.
955
+
956
+ expected_types:
957
+ Specifies a list of teradatasqlachemy datatypes that the column is
958
+ expected to be of type.
959
+
960
+ types:
961
+ Dictionary specifying column-name to teradatasqlalchemy type-mapping.
962
+
963
+ RETURNS:
964
+ True, when the columns is of an expected type.
965
+
966
+ RAISES:
967
+ TeradataMlException, when the columns is not one of the expected types.
968
+
969
+ EXAMPLES:
970
+ _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES, types)
971
+ """
972
+ # Check if sequence_column is being translated to a valid_type
973
+ if types is not None and col in types:
974
+ if not any(isinstance(types[col], expected_type) for expected_type in expected_types):
975
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
976
+ format(col_arg, types[col], ' or '.join(expected_type.__visit_name__
977
+ for expected_type in expected_types)),
978
+ MessageCodes.INVALID_COLUMN_TYPE)
979
+ # Else we need to copy without any casting
980
+ elif isinstance(df, pd.DataFrame):
981
+ t = _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
982
+ if t not in expected_types:
983
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
984
+ format(col_arg, t, ' or '.join(expected_type.__visit_name__
985
+ for expected_type in expected_types)),
986
+ MessageCodes.INVALID_COLUMN_TYPE)
987
+ elif not any(isinstance(df[col].type, t) for t in expected_types):
988
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
989
+ format(col_arg, df[col].type, ' or '.join(expected_type.__visit_name__
990
+ for expected_type in expected_types)),
991
+ MessageCodes.INVALID_COLUMN_TYPE)
992
+
993
+ return True
994
+
995
+
996
+ def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
997
+ index_label=None):
998
+ """
999
+ This is an internal function used to construct a SQLAlchemy Table Object.
1000
+ This function checks appropriate flags and supports creation of Teradata
1001
+ specific Table constructs such as Volatile/Primary Index tables.
1002
+
1003
+
1004
+ PARAMETERS:
1005
+ df:
1006
+ The teradataml or Pandas DataFrame object to be saved.
1007
+
1008
+ table_name:
1009
+ Name of SQL table.
1010
+
1011
+ con:
1012
+ A SQLAlchemy connectable (engine/connection) object
1013
+
1014
+ primary_index:
1015
+ Creates Teradata Table(s) with Primary index column if specified.
1016
+
1017
+ temporary:
1018
+ Flag specifying whether SQL table to be created is Volatile or not.
1019
+
1020
+ schema_name:
1021
+ Specifies the name of the SQL schema in the database to write to.
1022
+
1023
+ set_table:
1024
+ A flag specifying whether to create a SET table or a MULTISET table.
1025
+ When True, an attempt to create a SET table is made.
1026
+ When False, an attempt to create a MULTISET table is made.
1027
+
1028
+ types:
1029
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1030
+
1031
+ index:
1032
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1033
+
1034
+ index_label:
1035
+ Column label(s) for index column(s).
1036
+
1037
+ RETURNS:
1038
+ SQLAlchemy Table
1039
+
1040
+ RAISES:
1041
+ N/A
1042
+
1043
+ EXAMPLES:
1044
+ _create_table_object(df = my_df, table_name = 'test_table', con = tdconnection, primary_index = None,
1045
+ temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1046
+ _create_table_object(df = csv_filepath, table_name = 'test_table', con = tdconnection, primary_index = None,
1047
+ temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1048
+ """
1049
+ # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1050
+ post_params = {}
1051
+ prefix = []
1052
+ pti = post(opts=post_params)
1053
+
1054
+ if temporary is True:
1055
+ pti = pti.on_commit(option='preserve')
1056
+ prefix.append('VOLATILE')
1057
+
1058
+ if not set_table:
1059
+ prefix.append('multiset')
1060
+ else:
1061
+ prefix.append('set')
1062
+
1063
+ meta = MetaData()
1064
+ meta.bind = con
1065
+
1066
+ if isinstance(df, pd.DataFrame):
1067
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1068
+ elif isinstance(df, str):
1069
+ col_names, col_types = _extract_column_info(df, types)
1070
+ else:
1071
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1072
+ if types is not None:
1073
+ # When user-type provided use, or default when partial types provided.
1074
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1075
+
1076
+ if primary_index is not None:
1077
+ if isinstance(primary_index, list):
1078
+ pti = pti.primary_index(unique=False, cols=primary_index)
1079
+ elif isinstance(primary_index, str):
1080
+ pti = pti.primary_index(unique=False, cols=[primary_index])
1081
+ else:
1082
+ pti = pti.no_primary_index()
1083
+
1084
+ # Create default Table construct with parameter dictionary
1085
+ table = Table(table_name, meta,
1086
+ *(Column(col_name, col_type)
1087
+ for col_name, col_type in
1088
+ zip(col_names, col_types)),
1089
+ teradatasql_post_create=pti,
1090
+ prefixes=prefix,
1091
+ schema=schema_name
1092
+ )
1093
+
1094
+ return table
1095
+
1096
+
1097
+ def _create_pti_table_object(df, con, table_name, schema_name, temporary, primary_time_index_name,
1098
+ timecode_column, timezero_date, timebucket_duration,
1099
+ sequence_column, seq_max, columns_list, set_table, types, index=None, index_label=None):
1100
+ """
1101
+ This is an internal function used to construct a SQLAlchemy Table Object.
1102
+ This function checks appropriate flags and supports creation of Teradata
1103
+ specific Table constructs such as Volatile and Primary Time Index tables.
1104
+
1105
+ PARAMETERS:
1106
+ df:
1107
+ The teradataml or Pandas DataFrame object to be saved.
1108
+
1109
+ con:
1110
+ A SQLAlchemy connectable (engine/connection) object
1111
+
1112
+ table_name:
1113
+ Name of SQL table.
1114
+
1115
+ schema_name:
1116
+ Specifies the name of the SQL schema in the database to write to.
1117
+
1118
+ temporary:
1119
+ Flag specifying whether SQL table to be created is Volatile or not.
1120
+
1121
+ primary_time_index_name:
1122
+ A name for the Primary Time Index (PTI).
1123
+
1124
+ timecode_column:
1125
+ The column in the DataFrame that reflects the form of the timestamp
1126
+ data in the time series.
1127
+
1128
+ timezero_date:
1129
+ Specifies the earliest time series data that the PTI table will accept.
1130
+
1131
+ timebucket_duration:
1132
+ A duration that serves to break up the time continuum in
1133
+ the time series data into discrete groups or buckets.
1134
+
1135
+ sequence_column:
1136
+ Specifies a column with sequences implying that time series data
1137
+ readings are not unique. If not specified, the time series data are
1138
+ assumed to be unique.
1139
+
1140
+ seq_max:
1141
+ Specifies the maximum number of sensor data rows that can have the
1142
+ same timestamp. Can be used when 'sequenced' is True.
1143
+
1144
+ columns_list:
1145
+ A list of one or more PTI table column names.
1146
+
1147
+ set_table:
1148
+ A flag specifying whether to create a SET table or a MULTISET table.
1149
+ When True, an attempt to create a SET table is made.
1150
+ When False, an attempt to create a MULTISET table is made.
1151
+
1152
+ types:
1153
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1154
+
1155
+ index:
1156
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
1157
+
1158
+ index_label:
1159
+ Column label for index column(s).
1160
+
1161
+ RETURNS:
1162
+ SQLAlchemy Table
1163
+
1164
+ RAISES:
1165
+ N/A
1166
+
1167
+ EXAMPLES:
1168
+ _create_pti_table_object(df = my_df, table_name = 'test_table', con = tdconnection,
1169
+ timecode_column = 'ts', columns_list = ['user_id', 'location'])
1170
+
1171
+ """
1172
+ meta = MetaData()
1173
+
1174
+ if isinstance(df, pd.DataFrame):
1175
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1176
+ timecode_datatype = col_types[col_names.index(timecode_column)]()
1177
+ else:
1178
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1179
+ if types is not None:
1180
+ # When user-type provided use, or default when partial types provided
1181
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1182
+ timecode_datatype = df[timecode_column].type
1183
+
1184
+ # Remove timecode and sequence column from col_name and col_types
1185
+ # since the required columns will be created automatically
1186
+ if timecode_column in col_names:
1187
+ ind = col_names.index(timecode_column)
1188
+ col_names.pop(ind)
1189
+ col_types.pop(ind)
1190
+
1191
+ if sequence_column is not None and sequence_column in col_names:
1192
+ ind = col_names.index(sequence_column)
1193
+ col_names.pop(ind)
1194
+ col_types.pop(ind)
1195
+
1196
+ # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1197
+ post_params = {}
1198
+ prefix = []
1199
+ pti = post(opts=post_params)
1200
+
1201
+ # Create Table object with appropriate Primary Time Index/Prefix for volatile
1202
+ if temporary:
1203
+ pti = pti.on_commit(option='preserve')
1204
+ prefix.append('VOLATILE')
1205
+
1206
+ if not set_table:
1207
+ prefix.append('multiset')
1208
+ else:
1209
+ prefix.append('set')
1210
+
1211
+ pti = pti.primary_time_index(timecode_datatype,
1212
+ name=primary_time_index_name,
1213
+ timezero_date=timezero_date,
1214
+ timebucket_duration=timebucket_duration,
1215
+ sequenced=True if sequence_column is not None else False,
1216
+ seq_max=seq_max,
1217
+ cols=columns_list)
1218
+
1219
+ table = Table(table_name, meta,
1220
+ *(Column(col_name, col_type)
1221
+ for col_name, col_type in
1222
+ zip(col_names, col_types)),
1223
+ teradatasql_post_create=pti,
1224
+ prefixes=prefix,
1225
+ schema=schema_name
1226
+ )
1227
+
1228
+ return table
1229
+
1230
+
1231
+ def _rename_column(col_names, search_for, rename_to):
1232
+ """
1233
+ Internal function to rename a column in a list of columns of a Pandas DataFrame.
1234
+
1235
+ PARAMETERS:
1236
+ col_names:
1237
+ Required Argument.
1238
+ The list of column names of the Pandas DataFrame.
1239
+
1240
+ search_for:
1241
+ Required Argument.
1242
+ The column name that need to be changed/renamed.
1243
+
1244
+ rename_to:
1245
+ Required Argument.
1246
+ The column name that the 'search_for' column needs to be replaced with.
1247
+
1248
+ RETURNS:
1249
+ A list of renamed columns list.
1250
+
1251
+ EXAMPLES:
1252
+ cols = _rename_column(cols, 'col_1', 'new_col_1')
1253
+ """
1254
+ ind = col_names.index(search_for)
1255
+ col_names.pop(ind)
1256
+ col_names.insert(ind, rename_to)
1257
+
1258
+ return col_names
1259
+
1260
+
1261
+ def _rename_to_pti_columns(col_names, timecode_column, sequence_column,
1262
+ timecode_column_index=None, sequence_column_index=None):
1263
+ """
1264
+ Internal function to generate a list of renamed columns of a Pandas DataFrame to match that of the PTI table column names
1265
+ in Vantage, or revert any such changes made.
1266
+
1267
+ PARAMETERS:
1268
+ col_names:
1269
+ The list of column names of the Pandas DataFrame.
1270
+
1271
+ timecode_column:
1272
+ The column name that reflects the timecode column in the PTI table.
1273
+
1274
+ sequence_column:
1275
+ The column name that reflects the sequence column in the PTI table.
1276
+
1277
+ timecode_column_index:
1278
+ The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1279
+ performed.
1280
+
1281
+ sequence_column_index:
1282
+ The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1283
+ performed.
1284
+
1285
+ RETURNS:
1286
+ A list of renamed PTI related columns.
1287
+
1288
+ EXAMPLES:
1289
+ cols = _rename_to_pti_columns(cols, timecode_column, sequence_column, t_index=None, s_index)
1290
+ cols = _rename_to_pti_columns(cols, timecode_column, sequence_column)
1291
+ """
1292
+ # Rename the timecode_column to what it is in Vantage
1293
+ if timecode_column_index is not None:
1294
+ col_names = _rename_column(col_names, PTITableConstants.TD_TIMECODE.value, timecode_column)
1295
+ else:
1296
+ col_names = _rename_column(col_names, timecode_column, PTITableConstants.TD_TIMECODE.value)
1297
+
1298
+ # Rename the sequence_column to what it is in Vantage
1299
+ if sequence_column is not None:
1300
+ if sequence_column_index is not None:
1301
+ col_names = _rename_column(col_names, PTITableConstants.TD_SEQNO.value, sequence_column)
1302
+ else:
1303
+ col_names = _rename_column(col_names, sequence_column, PTITableConstants.TD_SEQNO.value)
1304
+
1305
+ return col_names
1306
+
1307
+
1308
+ def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column, df_col_type_list = None):
1309
+ """
1310
+ Internal function to reorder the list of columns used to construct the 'INSERT INTO'
1311
+ statement as required when the target table is a PTI table.
1312
+
1313
+ PARAMETERS:
1314
+ df_column_list:
1315
+ A list of column names for the columns in the DataFrame.
1316
+
1317
+ timecode_column:
1318
+ The timecode_columns which should be moved to the first position.
1319
+
1320
+ sequence_column:
1321
+ The timecode_columns which should be moved to the first position.
1322
+
1323
+ df_col_type_list:
1324
+ Optionally reorder the list containing the types of the columns to match the
1325
+ reordering the of df_column_list.
1326
+
1327
+ RETURNS:
1328
+ A reordered list of columns names for the columns in the DataFrame.
1329
+ If the optional types list is also specified, then a tuple of the list reordered columns names
1330
+ and the list of the column types.
1331
+
1332
+ EXAMPLE:
1333
+ new_colname_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
1334
+ new_colname_list, new_type_list = _reorder_insert_list_for_pti(df_column_list, timecode_column,
1335
+ sequence_column, df_col_type_list)
1336
+ """
1337
+ # Reposition timecode (to the first) and sequence column (to the second)
1338
+ # in df_column_list
1339
+ timecode_column_index = df_column_list.index(timecode_column)
1340
+ df_column_list.insert(0, df_column_list.pop(timecode_column_index))
1341
+ if df_col_type_list is not None:
1342
+ df_col_type_list.insert(0, df_col_type_list.pop(timecode_column_index))
1343
+
1344
+ if sequence_column is not None:
1345
+ sequence_column_index = df_column_list.index(sequence_column)
1346
+ df_column_list.insert(1, df_column_list.pop(sequence_column_index))
1347
+ if df_col_type_list is not None:
1348
+ df_col_type_list.insert(0, df_col_type_list.pop(sequence_column_index))
1349
+
1350
+ if df_col_type_list is not None:
1351
+ return df_column_list, df_col_type_list
1352
+ else:
1353
+ return df_column_list
1354
+
1355
+
1356
+ def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
1357
+ is_pti=False, timecode_column=None, sequence_column=None):
1358
+ """
1359
+ Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
1360
+ and check if the number of columns and their names are matching to determine table insertion compatibility.
1361
+
1362
+ PARAMETERS:
1363
+ table1_col_object:
1364
+ Specifies a list/collection of SQLAlchemy ColumnExpression Objects for first table.
1365
+
1366
+ table2_cols:
1367
+ Specifies a list of column names for second table (teradataml DataFrame).
1368
+
1369
+ is_pandas_df:
1370
+ Flag specifying whether the table objects to check are pandas DataFrames or not
1371
+ Default: False
1372
+ Note: When this flag is True, table2_cols is passed as a tuple object of
1373
+ ([column_names], [column_types])
1374
+
1375
+ is_pti:
1376
+ Boolean flag indicating if the target table is a PTI table.
1377
+
1378
+ timecode_column:
1379
+ timecode_column required to order the select expression for the insert.
1380
+ It should be the first column in the select expression.
1381
+ q
1382
+ sequence_column:
1383
+ sequence_column required to order the select expression for the insert.
1384
+ It should be the second column in the select expression.
1385
+
1386
+
1387
+ RETURNS:
1388
+ a) True, when insertion compatible (number of columns and their names match)
1389
+ b) False, otherwise
1390
+
1391
+ RAISES:
1392
+ N/A
1393
+
1394
+ EXAMPLES:
1395
+ _check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
1396
+ _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
1397
+
1398
+ """
1399
+ table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
1400
+ table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
1401
+
1402
+ # Check for number of columns
1403
+ if len(table1_col_names) != len(table2_col_names):
1404
+ return False
1405
+
1406
+ if is_pti is True:
1407
+ # Reposition timecode (to the first) and sequence column (to the second)
1408
+ # with their names as generated by the database, in col_name since that
1409
+ # is the default position of the columns.
1410
+ table2_col_names = _reorder_insert_list_for_pti(table2_col_names, timecode_column, sequence_column)
1411
+ table2_col_names = _rename_to_pti_columns(table2_col_names, timecode_column, sequence_column)
1412
+
1413
+ # Check for the column names
1414
+ for i in range(len(table1_col_names)):
1415
+ if table1_col_names[i] != table2_col_names[i]:
1416
+ return False
1417
+
1418
+ # Number of columns and their names in both List of ColumnExpressions match
1419
+ return True
1420
+
1421
+
1422
+ def _extract_column_info(df, types = None, index = False, index_label = None):
1423
+ """
1424
+ This is an internal function used to extract column information for a DF,
1425
+ and map to user-specified teradatasqlalchemy types, if specified,
1426
+ for Table creation.
1427
+
1428
+ PARAMETERS:
1429
+ df:
1430
+ The Pandas DataFrame object to be saved.
1431
+
1432
+ types:
1433
+ A python dictionary with column names and required types as key-value pairs.
1434
+
1435
+ index:
1436
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1437
+
1438
+ index_label:
1439
+ Column label(s) for index column(s).
1440
+
1441
+ RETURNS:
1442
+ A tuple with the following elements:
1443
+ a) List of DataFrame Column names
1444
+ b) List of equivalent teradatasqlalchemy column types
1445
+
1446
+ RAISES:
1447
+ None
1448
+
1449
+ EXAMPLES:
1450
+ _extract_column_info(df = my_df)
1451
+ _extract_column_info(df = my_df, types = {'id_col': INTEGER})
1452
+
1453
+ """
1454
+ if isinstance(df, str):
1455
+ return list(types.keys()), list(types.values())
1456
+
1457
+ col_names = _get_pd_df_column_names(df)
1458
+
1459
+ # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1460
+ # TIMESTAMP(timezone=True) else map it according to default value.
1461
+ col_types = [types.get(col_name) if types and col_name in types else
1462
+ TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1463
+ and (df[col_name].dt.tz is not None)
1464
+ else _get_sqlalchemy_mapping_types(str(df.dtypes[key]))
1465
+ for key, col_name in enumerate(list(df.columns))]
1466
+
1467
+ ind_names = []
1468
+ ind_types = []
1469
+ if index:
1470
+ ind_names, ind_types = _get_index_labels(df, index_label)
1471
+ ind_types = [types.get(ind_name) if types and ind_name in types
1472
+ else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1473
+ and (df[ind_name].dt.tz is not None)
1474
+ else _get_sqlalchemy_mapping_types(str(ind_types[key]))
1475
+ for key, ind_name in enumerate(ind_names)]
1476
+
1477
+ return col_names + ind_names, col_types + ind_types
1478
+
1479
+
1480
+ def _insert_from_dataframe(df, con, schema_name, table_name, index, chunksize,
1481
+ is_pti=False, timecode_column=None, sequence_column=None,
1482
+ match_column_order=True):
1483
+ """
1484
+ This is an internal function used to sequentially extract column info from DF,
1485
+ iterate rows, and insert rows manually.
1486
+ Used for Insertions to Temporary Tables & Tables with Pandas index.
1487
+
1488
+ This uses DBAPI's executeMany() which is a batch insertion method.
1489
+
1490
+ PARAMETERS:
1491
+ df:
1492
+ The Pandas DataFrame object to be saved.
1493
+
1494
+ con:
1495
+ A SQLAlchemy connectable (engine/connection) object
1496
+
1497
+ schema_name:
1498
+ Name of the schema.
1499
+
1500
+ table_name:
1501
+ Name of the table.
1502
+
1503
+ index:
1504
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
1505
+
1506
+ chunksize:
1507
+ Specifies the number of rows to be loaded in a batch.
1508
+ Note:
1509
+ This is argument is used only when argument "df" is pandas DataFrame.
1510
+
1511
+ is_pti:
1512
+ Boolean flag indicating if the table should be a PTI table.
1513
+
1514
+ timecode_column:
1515
+ timecode_column required to order the select expression for the insert.
1516
+ It should be the first column in the select expression.
1517
+
1518
+ sequence_column:
1519
+ sequence_column required to order the select expression for the insert.
1520
+ It should be the second column in the select expression.
1521
+
1522
+ match_column_order:
1523
+ Specifies the order of the df to be loaded matches the order of the
1524
+ existing df or not.
1525
+
1526
+ RETURNS:
1527
+ N/A
1528
+
1529
+ RAISES:
1530
+ N/A
1531
+
1532
+ EXAMPLES:
1533
+ _insert_from_dataframe(df = my_df, con = tdconnection, schema = None, table_name = 'test_table',
1534
+ index = True, index_label = None)
1535
+ """
1536
+ col_names = _get_pd_df_column_names(df)
1537
+
1538
+ # Quoted, schema-qualified table name
1539
+ table = '"{}"'.format(table_name)
1540
+ if schema_name is not None:
1541
+ table = '"{}".{}'.format(schema_name, table_name)
1542
+
1543
+ try:
1544
+
1545
+ if is_pti:
1546
+ # This if for non-index columns.
1547
+ col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
1548
+
1549
+ is_multi_index = isinstance(df.index, pd.MultiIndex)
1550
+
1551
+ insert_list = []
1552
+
1553
+ if not match_column_order:
1554
+ ins = "INSERT INTO {} {} VALUES {};".format(
1555
+ table,
1556
+ '(' + ', '.join(col_names) + ')',
1557
+ '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1558
+ if index is True else len(col_names))]) + ')')
1559
+ else:
1560
+ ins = "INSERT INTO {} VALUES {};".format(
1561
+ table,
1562
+ '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1563
+ if index is True else len(col_names))]) + ')')
1564
+
1565
+ # Empty queryband buffer before SQL call.
1566
+ UtilFuncs._set_queryband()
1567
+ rowcount = 0
1568
+ # Iterate rows of DataFrame over new re-ordered columns
1569
+ for row_index, row in enumerate(df[col_names].itertuples(index=True)):
1570
+ ins_dict = ()
1571
+ for col_index, x in enumerate(col_names):
1572
+ ins_dict = ins_dict + (row[col_index+1],)
1573
+
1574
+ if index is True:
1575
+ ins_dict = ins_dict + row[0] if is_multi_index else ins_dict + (row[0],)
1576
+
1577
+ insert_list.append(ins_dict)
1578
+ rowcount = rowcount + 1
1579
+
1580
+ # dbapi_batchsize corresponds to the max batch size for the DBAPI driver.
1581
+ # Insert the rows once the batch-size reaches the max allowed.
1582
+ if rowcount == chunksize:
1583
+ # Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
1584
+ cur = execute_sql(ins, insert_list)
1585
+ if cur is not None:
1586
+ cur.close()
1587
+ rowcount = 0
1588
+ insert_list.clear()
1589
+
1590
+ # Insert any remaining rows.
1591
+ if rowcount > 0:
1592
+ cur = execute_sql(ins, insert_list)
1593
+ if cur is not None:
1594
+ cur.close()
1595
+
1596
+ except Exception:
1597
+ raise
1598
+
1599
+
1600
+ def _get_pd_df_column_names(df):
1601
+ """
1602
+ Internal function to return the names of columns in a Pandas DataFrame.
1603
+
1604
+ PARAMETERS
1605
+ df:
1606
+ The Pandas DataFrame to fetch the column names for.
1607
+
1608
+ RETURNS:
1609
+ A list of Strings
1610
+
1611
+ RAISES:
1612
+ None
1613
+
1614
+ EXAMPLES:
1615
+ _get_pd_df_column_names(df = my_df)
1616
+ """
1617
+ return df.columns.tolist()
1618
+
1619
+
1620
+ def _get_sqlalchemy_mapping(key):
1621
+ """
1622
+ This is an internal function used to returns a SQLAlchemy Type Mapping
1623
+ for a given Pandas DataFrame column Type.
1624
+ Used for Table Object creation internally based on DF column info.
1625
+
1626
+ For an unknown key, String (Mapping to VARCHAR) is returned
1627
+
1628
+ PARAMETERS:
1629
+ key : String representing Pandas type ('int64', 'object' etc.)
1630
+
1631
+ RETURNS:
1632
+ SQLAlchemy Type Object(Integer, String, Float, DateTime etc.)
1633
+
1634
+ RAISES:
1635
+ N/A
1636
+
1637
+ EXAMPLES:
1638
+ _get_sqlalchemy_mapping(key = 'int64')
1639
+ """
1640
+ teradata_types_map = _get_all_sqlalchemy_mappings()
1641
+
1642
+ if key in teradata_types_map.keys():
1643
+ return teradata_types_map.get(key)
1644
+ else:
1645
+ return VARCHAR(configure.default_varchar_size,charset='UNICODE')
1646
+
1647
+
1648
+ def _get_all_sqlalchemy_mappings():
1649
+ """
1650
+ This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
1651
+ It contains mappings from pandas data type to objects of SQLAlchemy Types
1652
+
1653
+ PARAMETERS:
1654
+
1655
+ RETURNS:
1656
+ dictionary { pandas_type : SQLAlchemy Type Object}
1657
+
1658
+ RAISES:
1659
+ N/A
1660
+
1661
+ EXAMPLES:
1662
+ _get_all_sqlalchemy_mappings()
1663
+ """
1664
+ teradata_types_map = {'int32':INTEGER(), 'int64':BIGINT(), "Int64": INTEGER(),
1665
+ 'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1666
+ 'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1667
+ 'float64':FLOAT(), 'float32':FLOAT(), 'bool':BYTEINT(),
1668
+ 'datetime64':TIMESTAMP(), 'datetime64[ns]':TIMESTAMP(),
1669
+ 'datetime64[ns, UTC]':TIMESTAMP(timezone=True),
1670
+ 'timedelta64[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1671
+ 'timedelta[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE')}
1672
+
1673
+ return teradata_types_map
1674
+
1675
+
1676
+ def _get_sqlalchemy_mapping_types(key):
1677
+ """
1678
+ This is an internal function used to return a SQLAlchemy Type Mapping
1679
+ for a given Pandas DataFrame column Type.
1680
+ Used for Table Object creation internally based on DF column info.
1681
+
1682
+ For an unknown key, String (Mapping to VARCHAR) is returned
1683
+
1684
+ PARAMETERS:
1685
+ key : String representing Pandas type ('int64', 'object' etc.)
1686
+
1687
+ RETURNS:
1688
+ SQLAlchemy Type (Integer, String, Float, DateTime etc.)
1689
+
1690
+ RAISES:
1691
+ N/A
1692
+
1693
+ EXAMPLES:
1694
+ _get_sqlalchemy_mapping_types(key = 'int64')
1695
+ """
1696
+ teradata_types_map = _get_all_sqlalchemy_types_mapping()
1697
+
1698
+ if key in teradata_types_map.keys():
1699
+ return teradata_types_map.get(key)
1700
+ else:
1701
+ return VARCHAR(configure.default_varchar_size,charset='UNICODE')
1702
+
1703
+
1704
+ def _get_all_sqlalchemy_types_mapping():
1705
+ """
1706
+ This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
1707
+ It contains mappings from pandas data type to SQLAlchemyTypes
1708
+
1709
+ PARAMETERS:
1710
+
1711
+ RETURNS:
1712
+ dictionary { pandas_type : SQLAlchemy Type}
1713
+
1714
+ RAISES:
1715
+ N/A
1716
+
1717
+ EXAMPLES:
1718
+ _get_all_sqlalchemy_types_mapping()
1719
+ """
1720
+ teradata_types_map = {'int32': INTEGER, 'int64': BIGINT,
1721
+ 'object': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
1722
+ 'O': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
1723
+ 'float64': FLOAT, 'float32': FLOAT, 'bool': BYTEINT,
1724
+ 'datetime64': TIMESTAMP, 'datetime64[ns]': TIMESTAMP,
1725
+ 'datetime64[ns, UTC]': TIMESTAMP(timezone=True),
1726
+ 'timedelta64[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
1727
+ 'timedelta[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE')}
1728
+
1729
+ return teradata_types_map
1730
+
1731
+
1732
+ def _validate_timezero_date(timezero_date):
1733
+ """
1734
+ Internal function to validate timezero_date specified when creating a
1735
+ Primary Time Index (PTI) table.
1736
+
1737
+ PARAMETERS:
1738
+ timezero_date:
1739
+ The timezero_date passed to primary_time_index().
1740
+
1741
+ RETURNS:
1742
+ True if the value is valid.
1743
+
1744
+ RAISES:
1745
+ ValueError when the value is invalid.
1746
+
1747
+ EXAMPLE:
1748
+ _validate_timezero_date("DATE '2011-01-01'")
1749
+ _validate_timezero_date('2011-01-01') # Invalid
1750
+ """
1751
+ # Return True is it is not specified or is None since it is optional
1752
+ if timezero_date is None:
1753
+ return True
1754
+
1755
+ pattern = re.compile(PTITableConstants.PATTERN_TIMEZERO_DATE.value)
1756
+ match = pattern.match(timezero_date)
1757
+
1758
+ err_msg = Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(timezero_date,
1759
+ 'timezero_date',
1760
+ "str of format DATE 'YYYY-MM-DD'")
1761
+
1762
+ try:
1763
+ datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
1764
+ except (ValueError, AttributeError):
1765
+ raise TeradataMlException(err_msg,
1766
+ MessageCodes.INVALID_ARG_VALUE)
1767
+
1768
+ # Looks like the value is valid
1769
+ return True