teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1285) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1864 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2013 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +804 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1628 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +993 -0
  31. teradataml/automl/data_transformation.py +727 -0
  32. teradataml/automl/feature_engineering.py +1648 -0
  33. teradataml/automl/feature_exploration.py +547 -0
  34. teradataml/automl/model_evaluation.py +163 -0
  35. teradataml/automl/model_training.py +887 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/pkce_client.py +481 -481
  41. teradataml/common/aed_utils.py +6 -2
  42. teradataml/common/bulk_exposed_utils.py +111 -111
  43. teradataml/common/constants.py +1433 -1441
  44. teradataml/common/deprecations.py +160 -0
  45. teradataml/common/exceptions.py +73 -73
  46. teradataml/common/formula.py +742 -742
  47. teradataml/common/garbagecollector.py +592 -635
  48. teradataml/common/messagecodes.py +422 -431
  49. teradataml/common/messages.py +227 -231
  50. teradataml/common/sqlbundle.py +693 -693
  51. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  52. teradataml/common/utils.py +2418 -2500
  53. teradataml/common/warnings.py +25 -25
  54. teradataml/common/wrapper_utils.py +1 -110
  55. teradataml/config/dummy_file1.cfg +4 -4
  56. teradataml/config/dummy_file2.cfg +2 -2
  57. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  58. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  59. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  60. teradataml/context/aed_context.py +217 -217
  61. teradataml/context/context.py +1071 -999
  62. teradataml/data/A_loan.csv +19 -19
  63. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  64. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  65. teradataml/data/B_loan.csv +49 -49
  66. teradataml/data/BuoyData2.csv +17 -17
  67. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  68. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  69. teradataml/data/Convolve2RealsLeft.csv +5 -5
  70. teradataml/data/Convolve2RealsRight.csv +5 -5
  71. teradataml/data/Convolve2ValidLeft.csv +11 -11
  72. teradataml/data/Convolve2ValidRight.csv +11 -11
  73. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  74. teradataml/data/Orders1_12mf.csv +24 -24
  75. teradataml/data/Pi_loan.csv +7 -7
  76. teradataml/data/SMOOTHED_DATA.csv +7 -7
  77. teradataml/data/TestDFFT8.csv +9 -9
  78. teradataml/data/TestRiver.csv +109 -109
  79. teradataml/data/Traindata.csv +28 -28
  80. teradataml/data/acf.csv +17 -17
  81. teradataml/data/adaboost_example.json +34 -34
  82. teradataml/data/adaboostpredict_example.json +24 -24
  83. teradataml/data/additional_table.csv +10 -10
  84. teradataml/data/admissions_test.csv +21 -21
  85. teradataml/data/admissions_train.csv +41 -41
  86. teradataml/data/admissions_train_nulls.csv +41 -41
  87. teradataml/data/ageandheight.csv +13 -13
  88. teradataml/data/ageandpressure.csv +31 -31
  89. teradataml/data/antiselect_example.json +36 -36
  90. teradataml/data/antiselect_input.csv +8 -8
  91. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  92. teradataml/data/applicant_external.csv +6 -6
  93. teradataml/data/applicant_reference.csv +6 -6
  94. teradataml/data/arima_example.json +9 -9
  95. teradataml/data/assortedtext_input.csv +8 -8
  96. teradataml/data/attribution_example.json +33 -33
  97. teradataml/data/attribution_sample_table.csv +27 -27
  98. teradataml/data/attribution_sample_table1.csv +6 -6
  99. teradataml/data/attribution_sample_table2.csv +11 -11
  100. teradataml/data/bank_churn.csv +10001 -0
  101. teradataml/data/bank_web_clicks1.csv +42 -42
  102. teradataml/data/bank_web_clicks2.csv +91 -91
  103. teradataml/data/bank_web_url.csv +85 -85
  104. teradataml/data/barrier.csv +2 -2
  105. teradataml/data/barrier_new.csv +3 -3
  106. teradataml/data/betweenness_example.json +13 -13
  107. teradataml/data/bin_breaks.csv +8 -8
  108. teradataml/data/bin_fit_ip.csv +3 -3
  109. teradataml/data/binary_complex_left.csv +11 -11
  110. teradataml/data/binary_complex_right.csv +11 -11
  111. teradataml/data/binary_matrix_complex_left.csv +21 -21
  112. teradataml/data/binary_matrix_complex_right.csv +21 -21
  113. teradataml/data/binary_matrix_real_left.csv +21 -21
  114. teradataml/data/binary_matrix_real_right.csv +21 -21
  115. teradataml/data/blood2ageandweight.csv +26 -26
  116. teradataml/data/bmi.csv +501 -0
  117. teradataml/data/boston.csv +507 -507
  118. teradataml/data/buoydata_mix.csv +11 -11
  119. teradataml/data/burst_data.csv +5 -5
  120. teradataml/data/burst_example.json +20 -20
  121. teradataml/data/byom_example.json +17 -17
  122. teradataml/data/bytes_table.csv +3 -3
  123. teradataml/data/cal_housing_ex_raw.csv +70 -70
  124. teradataml/data/callers.csv +7 -7
  125. teradataml/data/calls.csv +10 -10
  126. teradataml/data/cars_hist.csv +33 -33
  127. teradataml/data/cat_table.csv +24 -24
  128. teradataml/data/ccm_example.json +31 -31
  129. teradataml/data/ccm_input.csv +91 -91
  130. teradataml/data/ccm_input2.csv +13 -13
  131. teradataml/data/ccmexample.csv +101 -101
  132. teradataml/data/ccmprepare_example.json +8 -8
  133. teradataml/data/ccmprepare_input.csv +91 -91
  134. teradataml/data/cfilter_example.json +12 -12
  135. teradataml/data/changepointdetection_example.json +18 -18
  136. teradataml/data/changepointdetectionrt_example.json +8 -8
  137. teradataml/data/chi_sq.csv +2 -2
  138. teradataml/data/churn_data.csv +14 -14
  139. teradataml/data/churn_emission.csv +35 -35
  140. teradataml/data/churn_initial.csv +3 -3
  141. teradataml/data/churn_state_transition.csv +5 -5
  142. teradataml/data/citedges_2.csv +745 -745
  143. teradataml/data/citvertices_2.csv +1210 -1210
  144. teradataml/data/clicks2.csv +16 -16
  145. teradataml/data/clickstream.csv +12 -12
  146. teradataml/data/clickstream1.csv +11 -11
  147. teradataml/data/closeness_example.json +15 -15
  148. teradataml/data/complaints.csv +21 -21
  149. teradataml/data/complaints_mini.csv +3 -3
  150. teradataml/data/complaints_testtoken.csv +224 -224
  151. teradataml/data/complaints_tokens_test.csv +353 -353
  152. teradataml/data/complaints_traintoken.csv +472 -472
  153. teradataml/data/computers_category.csv +1001 -1001
  154. teradataml/data/computers_test1.csv +1252 -1252
  155. teradataml/data/computers_train1.csv +5009 -5009
  156. teradataml/data/computers_train1_clustered.csv +5009 -5009
  157. teradataml/data/confusionmatrix_example.json +9 -9
  158. teradataml/data/conversion_event_table.csv +3 -3
  159. teradataml/data/corr_input.csv +17 -17
  160. teradataml/data/correlation_example.json +11 -11
  161. teradataml/data/coxhazardratio_example.json +39 -39
  162. teradataml/data/coxph_example.json +15 -15
  163. teradataml/data/coxsurvival_example.json +28 -28
  164. teradataml/data/cpt.csv +41 -41
  165. teradataml/data/credit_ex_merged.csv +45 -45
  166. teradataml/data/customer_loyalty.csv +301 -301
  167. teradataml/data/customer_loyalty_newseq.csv +31 -31
  168. teradataml/data/dataframe_example.json +146 -146
  169. teradataml/data/decisionforest_example.json +37 -37
  170. teradataml/data/decisionforestpredict_example.json +38 -38
  171. teradataml/data/decisiontree_example.json +21 -21
  172. teradataml/data/decisiontreepredict_example.json +45 -45
  173. teradataml/data/dfft2_size4_real.csv +17 -17
  174. teradataml/data/dfft2_test_matrix16.csv +17 -17
  175. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  176. teradataml/data/diabetes.csv +443 -443
  177. teradataml/data/diabetes_test.csv +89 -89
  178. teradataml/data/dict_table.csv +5 -5
  179. teradataml/data/docperterm_table.csv +4 -4
  180. teradataml/data/docs/__init__.py +1 -1
  181. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  182. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  183. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  184. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  185. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  186. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  187. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  188. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  189. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  190. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  191. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  192. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  193. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  194. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  195. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  196. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  197. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  198. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  199. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  200. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  201. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  202. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  203. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  204. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  205. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  206. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  207. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  208. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  209. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +132 -132
  210. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +103 -103
  211. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  212. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +101 -101
  213. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  214. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  215. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  216. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  217. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  218. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  219. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  220. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  221. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  222. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  223. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  224. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  225. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  226. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  227. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  228. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  229. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  230. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  231. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  232. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  233. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  234. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +126 -126
  235. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  236. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  237. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  238. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  239. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  240. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  241. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  242. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  243. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +243 -243
  244. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  245. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  246. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  247. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  248. teradataml/data/docs/sqle/docs_17_20/FTest.py +160 -160
  249. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  250. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  251. teradataml/data/docs/sqle/docs_17_20/GLM.py +380 -380
  252. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  253. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  254. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  255. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +123 -123
  256. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  257. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  258. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  259. teradataml/data/docs/sqle/docs_17_20/KMeans.py +204 -204
  260. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  261. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  262. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  263. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  264. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  265. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  266. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  267. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  268. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +117 -117
  269. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  270. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  271. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  272. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  273. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +225 -225
  274. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +115 -115
  275. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  276. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  277. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  278. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  279. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  280. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  281. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  282. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  283. teradataml/data/docs/sqle/docs_17_20/ROC.py +163 -163
  284. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  285. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  286. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  287. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  288. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  289. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  290. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  291. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  292. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +202 -202
  293. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  294. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +197 -197
  295. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +110 -109
  296. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  297. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  298. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  299. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  300. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  301. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  302. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  303. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  304. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +171 -171
  305. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  306. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  307. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  308. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  309. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  310. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  311. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  312. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  313. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  314. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  315. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  316. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +353 -353
  317. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +275 -275
  318. teradataml/data/docs/sqle/docs_17_20/ZTest.py +155 -155
  319. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  320. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  321. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  322. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  323. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  324. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  325. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  326. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  327. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  328. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  329. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  330. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  331. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  332. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  333. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  334. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  335. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  336. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  337. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  338. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  339. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  340. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  341. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  342. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  343. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  344. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  345. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  346. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  347. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  348. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  349. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  350. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  351. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  352. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  353. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  354. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  355. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  356. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  357. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  358. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  359. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  360. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  361. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  362. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  363. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  364. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  365. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  366. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  367. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  368. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  369. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  370. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  371. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  372. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  373. teradataml/data/dtw_example.json +17 -17
  374. teradataml/data/dtw_t1.csv +11 -11
  375. teradataml/data/dtw_t2.csv +4 -4
  376. teradataml/data/dwt2d_example.json +15 -15
  377. teradataml/data/dwt_example.json +14 -14
  378. teradataml/data/dwt_filter_dim.csv +5 -5
  379. teradataml/data/emission.csv +9 -9
  380. teradataml/data/emp_table_by_dept.csv +19 -19
  381. teradataml/data/employee_info.csv +4 -4
  382. teradataml/data/employee_table.csv +6 -6
  383. teradataml/data/excluding_event_table.csv +2 -2
  384. teradataml/data/finance_data.csv +6 -6
  385. teradataml/data/finance_data2.csv +61 -61
  386. teradataml/data/finance_data3.csv +93 -93
  387. teradataml/data/fish.csv +160 -0
  388. teradataml/data/fm_blood2ageandweight.csv +26 -26
  389. teradataml/data/fmeasure_example.json +11 -11
  390. teradataml/data/followers_leaders.csv +10 -10
  391. teradataml/data/fpgrowth_example.json +12 -12
  392. teradataml/data/frequentpaths_example.json +29 -29
  393. teradataml/data/friends.csv +9 -9
  394. teradataml/data/fs_input.csv +33 -33
  395. teradataml/data/fs_input1.csv +33 -33
  396. teradataml/data/genData.csv +513 -513
  397. teradataml/data/geodataframe_example.json +39 -39
  398. teradataml/data/glass_types.csv +215 -0
  399. teradataml/data/glm_admissions_model.csv +12 -12
  400. teradataml/data/glm_example.json +29 -29
  401. teradataml/data/glml1l2_example.json +28 -28
  402. teradataml/data/glml1l2predict_example.json +54 -54
  403. teradataml/data/glmpredict_example.json +54 -54
  404. teradataml/data/gq_t1.csv +21 -21
  405. teradataml/data/hconvolve_complex_right.csv +5 -5
  406. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  407. teradataml/data/histogram_example.json +11 -11
  408. teradataml/data/hmmdecoder_example.json +78 -78
  409. teradataml/data/hmmevaluator_example.json +24 -24
  410. teradataml/data/hmmsupervised_example.json +10 -10
  411. teradataml/data/hmmunsupervised_example.json +7 -7
  412. teradataml/data/house_values.csv +12 -12
  413. teradataml/data/house_values2.csv +13 -13
  414. teradataml/data/housing_cat.csv +7 -7
  415. teradataml/data/housing_data.csv +9 -9
  416. teradataml/data/housing_test.csv +47 -47
  417. teradataml/data/housing_test_binary.csv +47 -47
  418. teradataml/data/housing_train.csv +493 -493
  419. teradataml/data/housing_train_attribute.csv +4 -4
  420. teradataml/data/housing_train_binary.csv +437 -437
  421. teradataml/data/housing_train_parameter.csv +2 -2
  422. teradataml/data/housing_train_response.csv +493 -493
  423. teradataml/data/ibm_stock.csv +370 -370
  424. teradataml/data/ibm_stock1.csv +370 -370
  425. teradataml/data/identitymatch_example.json +21 -21
  426. teradataml/data/idf_table.csv +4 -4
  427. teradataml/data/impressions.csv +101 -101
  428. teradataml/data/inflation.csv +21 -21
  429. teradataml/data/initial.csv +3 -3
  430. teradataml/data/insect_sprays.csv +12 -12
  431. teradataml/data/insurance.csv +1339 -1339
  432. teradataml/data/interpolator_example.json +12 -12
  433. teradataml/data/iris_altinput.csv +481 -481
  434. teradataml/data/iris_attribute_output.csv +8 -8
  435. teradataml/data/iris_attribute_test.csv +121 -121
  436. teradataml/data/iris_attribute_train.csv +481 -481
  437. teradataml/data/iris_category_expect_predict.csv +31 -31
  438. teradataml/data/iris_data.csv +151 -0
  439. teradataml/data/iris_input.csv +151 -151
  440. teradataml/data/iris_response_train.csv +121 -121
  441. teradataml/data/iris_test.csv +31 -31
  442. teradataml/data/iris_train.csv +121 -121
  443. teradataml/data/join_table1.csv +4 -4
  444. teradataml/data/join_table2.csv +4 -4
  445. teradataml/data/jsons/anly_function_name.json +6 -6
  446. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  447. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  448. teradataml/data/jsons/byom/h2opredict.json +194 -194
  449. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  450. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  451. teradataml/data/jsons/paired_functions.json +435 -435
  452. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  453. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  454. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  455. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  456. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  457. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  458. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  459. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  460. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  461. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  462. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  463. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  464. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  465. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  466. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  467. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  468. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  469. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  470. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  471. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  472. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  473. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  474. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  475. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  476. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  477. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  478. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  479. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  480. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  481. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  482. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  483. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  484. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  485. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  486. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  487. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  488. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  489. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  490. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  491. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  492. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  493. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  494. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  495. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  496. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  497. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  498. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  499. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  500. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  501. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  502. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  503. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  504. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  505. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  506. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  507. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  508. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  509. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  510. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  511. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  512. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  513. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  514. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  515. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  516. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  517. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  518. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  519. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  520. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  521. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  522. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  523. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  524. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  525. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  526. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  527. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  528. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  529. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  531. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  532. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  533. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  534. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  535. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  536. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  537. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  539. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  540. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  541. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  542. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  543. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  544. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  545. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  546. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  547. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  548. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  549. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  550. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  551. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  552. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  553. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  554. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  555. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  556. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  557. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  558. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +76 -76
  559. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  560. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  561. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  562. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  563. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  564. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  565. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  566. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  567. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  568. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  569. teradataml/data/jsons/sqle/17.20/TD_FTest.json +186 -186
  570. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  571. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  572. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  573. teradataml/data/jsons/sqle/17.20/TD_GLM.json +431 -431
  574. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +125 -125
  575. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  576. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  577. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +91 -91
  578. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  579. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  580. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  581. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +211 -211
  582. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  583. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  584. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  585. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +101 -101
  586. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  587. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  588. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  589. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  590. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  591. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  592. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  593. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  594. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  595. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  596. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  597. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  598. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  599. teradataml/data/jsons/sqle/17.20/TD_ROC.json +177 -177
  600. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  601. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  602. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  603. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  604. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  605. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  606. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  607. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  608. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +124 -124
  609. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +156 -156
  610. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +70 -70
  611. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  612. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  613. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  614. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  615. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  616. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  617. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  618. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  619. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  620. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  621. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  622. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  623. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  624. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  625. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +312 -312
  626. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +182 -182
  627. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +170 -170
  628. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  629. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  630. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  631. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  632. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  633. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  634. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  635. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  636. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  637. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  638. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  639. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  640. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  641. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  642. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  643. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  644. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  645. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  646. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  647. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  648. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  649. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  650. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  651. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  653. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  654. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  655. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  656. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  657. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  658. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  659. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  660. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  661. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  662. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  663. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  664. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  665. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  666. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  667. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  668. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  669. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  670. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  671. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  672. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  673. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  674. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  675. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  676. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  677. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  678. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  679. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  680. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  681. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  682. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  683. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  684. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  685. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  686. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  687. teradataml/data/kmeans_example.json +17 -17
  688. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  689. teradataml/data/knn_example.json +18 -18
  690. teradataml/data/knnrecommender_example.json +6 -6
  691. teradataml/data/knnrecommenderpredict_example.json +12 -12
  692. teradataml/data/lar_example.json +17 -17
  693. teradataml/data/larpredict_example.json +30 -30
  694. teradataml/data/lc_new_predictors.csv +5 -5
  695. teradataml/data/lc_new_reference.csv +9 -9
  696. teradataml/data/lda_example.json +8 -8
  697. teradataml/data/ldainference_example.json +14 -14
  698. teradataml/data/ldatopicsummary_example.json +8 -8
  699. teradataml/data/levendist_input.csv +13 -13
  700. teradataml/data/levenshteindistance_example.json +10 -10
  701. teradataml/data/linreg_example.json +9 -9
  702. teradataml/data/load_example_data.py +326 -323
  703. teradataml/data/loan_prediction.csv +295 -295
  704. teradataml/data/lungcancer.csv +138 -138
  705. teradataml/data/mappingdata.csv +12 -12
  706. teradataml/data/milk_timeseries.csv +157 -157
  707. teradataml/data/min_max_titanic.csv +4 -4
  708. teradataml/data/minhash_example.json +6 -6
  709. teradataml/data/ml_ratings.csv +7547 -7547
  710. teradataml/data/ml_ratings_10.csv +2445 -2445
  711. teradataml/data/model1_table.csv +5 -5
  712. teradataml/data/model2_table.csv +5 -5
  713. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  714. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  715. teradataml/data/modularity_example.json +12 -12
  716. teradataml/data/movavg_example.json +7 -7
  717. teradataml/data/mtx1.csv +7 -7
  718. teradataml/data/mtx2.csv +13 -13
  719. teradataml/data/multi_model_classification.csv +401 -0
  720. teradataml/data/multi_model_regression.csv +401 -0
  721. teradataml/data/mvdfft8.csv +9 -9
  722. teradataml/data/naivebayes_example.json +9 -9
  723. teradataml/data/naivebayespredict_example.json +19 -19
  724. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  725. teradataml/data/naivebayestextclassifier_example.json +8 -8
  726. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  727. teradataml/data/name_Find_configure.csv +10 -10
  728. teradataml/data/namedentityfinder_example.json +14 -14
  729. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  730. teradataml/data/namedentityfindertrainer_example.json +6 -6
  731. teradataml/data/nb_iris_input_test.csv +31 -31
  732. teradataml/data/nb_iris_input_train.csv +121 -121
  733. teradataml/data/nbp_iris_model.csv +13 -13
  734. teradataml/data/ner_extractor_text.csv +2 -2
  735. teradataml/data/ner_sports_test2.csv +29 -29
  736. teradataml/data/ner_sports_train.csv +501 -501
  737. teradataml/data/nerevaluator_example.json +5 -5
  738. teradataml/data/nerextractor_example.json +18 -18
  739. teradataml/data/nermem_sports_test.csv +17 -17
  740. teradataml/data/nermem_sports_train.csv +50 -50
  741. teradataml/data/nertrainer_example.json +6 -6
  742. teradataml/data/ngrams_example.json +6 -6
  743. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  744. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  745. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  746. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  747. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  748. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  749. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  750. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  751. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  752. teradataml/data/npath_example.json +23 -23
  753. teradataml/data/ntree_example.json +14 -14
  754. teradataml/data/numeric_strings.csv +4 -4
  755. teradataml/data/numerics.csv +4 -4
  756. teradataml/data/ocean_buoy.csv +17 -17
  757. teradataml/data/ocean_buoy2.csv +17 -17
  758. teradataml/data/ocean_buoys.csv +27 -27
  759. teradataml/data/ocean_buoys2.csv +10 -10
  760. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  761. teradataml/data/ocean_buoys_seq.csv +29 -29
  762. teradataml/data/openml_example.json +63 -0
  763. teradataml/data/optional_event_table.csv +4 -4
  764. teradataml/data/orders1.csv +11 -11
  765. teradataml/data/orders1_12.csv +12 -12
  766. teradataml/data/orders_ex.csv +4 -4
  767. teradataml/data/pack_example.json +8 -8
  768. teradataml/data/package_tracking.csv +19 -19
  769. teradataml/data/package_tracking_pti.csv +18 -18
  770. teradataml/data/pagerank_example.json +13 -13
  771. teradataml/data/paragraphs_input.csv +6 -6
  772. teradataml/data/pathanalyzer_example.json +7 -7
  773. teradataml/data/pathgenerator_example.json +7 -7
  774. teradataml/data/phrases.csv +7 -7
  775. teradataml/data/pivot_example.json +8 -8
  776. teradataml/data/pivot_input.csv +22 -22
  777. teradataml/data/playerRating.csv +31 -31
  778. teradataml/data/postagger_example.json +6 -6
  779. teradataml/data/posttagger_output.csv +44 -44
  780. teradataml/data/production_data.csv +16 -16
  781. teradataml/data/production_data2.csv +7 -7
  782. teradataml/data/randomsample_example.json +31 -31
  783. teradataml/data/randomwalksample_example.json +8 -8
  784. teradataml/data/rank_table.csv +6 -6
  785. teradataml/data/ref_mobile_data.csv +4 -4
  786. teradataml/data/ref_mobile_data_dense.csv +2 -2
  787. teradataml/data/ref_url.csv +17 -17
  788. teradataml/data/restaurant_reviews.csv +7 -7
  789. teradataml/data/river_data.csv +145 -145
  790. teradataml/data/roc_example.json +7 -7
  791. teradataml/data/roc_input.csv +101 -101
  792. teradataml/data/rule_inputs.csv +6 -6
  793. teradataml/data/rule_table.csv +2 -2
  794. teradataml/data/sales.csv +7 -7
  795. teradataml/data/sales_transaction.csv +501 -501
  796. teradataml/data/salesdata.csv +342 -342
  797. teradataml/data/sample_cities.csv +2 -2
  798. teradataml/data/sample_shapes.csv +10 -10
  799. teradataml/data/sample_streets.csv +2 -2
  800. teradataml/data/sampling_example.json +15 -15
  801. teradataml/data/sax_example.json +8 -8
  802. teradataml/data/scale_example.json +23 -23
  803. teradataml/data/scale_housing.csv +11 -11
  804. teradataml/data/scale_housing_test.csv +6 -6
  805. teradataml/data/scale_stat.csv +11 -11
  806. teradataml/data/scalebypartition_example.json +13 -13
  807. teradataml/data/scalemap_example.json +13 -13
  808. teradataml/data/scalesummary_example.json +12 -12
  809. teradataml/data/score_category.csv +101 -101
  810. teradataml/data/score_summary.csv +4 -4
  811. teradataml/data/script_example.json +9 -9
  812. teradataml/data/scripts/deploy_script.py +65 -0
  813. teradataml/data/scripts/mapper.R +20 -0
  814. teradataml/data/scripts/mapper.py +15 -15
  815. teradataml/data/scripts/mapper_replace.py +15 -15
  816. teradataml/data/scripts/sklearn/__init__.py +0 -0
  817. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  818. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  819. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  820. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  821. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  822. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  823. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  824. teradataml/data/seeds.csv +10 -10
  825. teradataml/data/sentenceextractor_example.json +6 -6
  826. teradataml/data/sentiment_extract_input.csv +11 -11
  827. teradataml/data/sentiment_train.csv +16 -16
  828. teradataml/data/sentiment_word.csv +20 -20
  829. teradataml/data/sentiment_word_input.csv +19 -19
  830. teradataml/data/sentimentextractor_example.json +24 -24
  831. teradataml/data/sentimenttrainer_example.json +8 -8
  832. teradataml/data/sequence_table.csv +10 -10
  833. teradataml/data/seriessplitter_example.json +7 -7
  834. teradataml/data/sessionize_example.json +17 -17
  835. teradataml/data/sessionize_table.csv +116 -116
  836. teradataml/data/setop_test1.csv +24 -24
  837. teradataml/data/setop_test2.csv +22 -22
  838. teradataml/data/soc_nw_edges.csv +10 -10
  839. teradataml/data/soc_nw_vertices.csv +7 -7
  840. teradataml/data/souvenir_timeseries.csv +167 -167
  841. teradataml/data/sparse_iris_attribute.csv +5 -5
  842. teradataml/data/sparse_iris_test.csv +121 -121
  843. teradataml/data/sparse_iris_train.csv +601 -601
  844. teradataml/data/star1.csv +6 -6
  845. teradataml/data/state_transition.csv +5 -5
  846. teradataml/data/stock_data.csv +53 -53
  847. teradataml/data/stock_movement.csv +11 -11
  848. teradataml/data/stock_vol.csv +76 -76
  849. teradataml/data/stop_words.csv +8 -8
  850. teradataml/data/store_sales.csv +37 -37
  851. teradataml/data/stringsimilarity_example.json +7 -7
  852. teradataml/data/strsimilarity_input.csv +13 -13
  853. teradataml/data/students.csv +101 -101
  854. teradataml/data/svm_iris_input_test.csv +121 -121
  855. teradataml/data/svm_iris_input_train.csv +481 -481
  856. teradataml/data/svm_iris_model.csv +7 -7
  857. teradataml/data/svmdense_example.json +9 -9
  858. teradataml/data/svmdensepredict_example.json +18 -18
  859. teradataml/data/svmsparse_example.json +7 -7
  860. teradataml/data/svmsparsepredict_example.json +13 -13
  861. teradataml/data/svmsparsesummary_example.json +7 -7
  862. teradataml/data/target_mobile_data.csv +13 -13
  863. teradataml/data/target_mobile_data_dense.csv +5 -5
  864. teradataml/data/templatedata.csv +1201 -1201
  865. teradataml/data/templates/open_source_ml.json +9 -0
  866. teradataml/data/teradataml_example.json +73 -1
  867. teradataml/data/test_classification.csv +101 -0
  868. teradataml/data/test_loan_prediction.csv +53 -53
  869. teradataml/data/test_pacf_12.csv +37 -37
  870. teradataml/data/test_prediction.csv +101 -0
  871. teradataml/data/test_regression.csv +101 -0
  872. teradataml/data/test_river2.csv +109 -109
  873. teradataml/data/text_inputs.csv +6 -6
  874. teradataml/data/textchunker_example.json +7 -7
  875. teradataml/data/textclassifier_example.json +6 -6
  876. teradataml/data/textclassifier_input.csv +7 -7
  877. teradataml/data/textclassifiertrainer_example.json +6 -6
  878. teradataml/data/textmorph_example.json +5 -5
  879. teradataml/data/textparser_example.json +15 -15
  880. teradataml/data/texttagger_example.json +11 -11
  881. teradataml/data/texttokenizer_example.json +6 -6
  882. teradataml/data/texttrainer_input.csv +11 -11
  883. teradataml/data/tf_example.json +6 -6
  884. teradataml/data/tfidf_example.json +13 -13
  885. teradataml/data/tfidf_input1.csv +201 -201
  886. teradataml/data/tfidf_train.csv +6 -6
  887. teradataml/data/time_table1.csv +535 -535
  888. teradataml/data/time_table2.csv +14 -14
  889. teradataml/data/timeseriesdata.csv +1601 -1601
  890. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  891. teradataml/data/titanic.csv +892 -892
  892. teradataml/data/token_table.csv +696 -696
  893. teradataml/data/train_multiclass.csv +101 -0
  894. teradataml/data/train_regression.csv +101 -0
  895. teradataml/data/train_regression_multiple_labels.csv +101 -0
  896. teradataml/data/train_tracking.csv +27 -27
  897. teradataml/data/transformation_table.csv +5 -5
  898. teradataml/data/transformation_table_new.csv +1 -1
  899. teradataml/data/tv_spots.csv +16 -16
  900. teradataml/data/twod_climate_data.csv +117 -117
  901. teradataml/data/uaf_example.json +475 -475
  902. teradataml/data/univariatestatistics_example.json +8 -8
  903. teradataml/data/unpack_example.json +9 -9
  904. teradataml/data/unpivot_example.json +9 -9
  905. teradataml/data/unpivot_input.csv +8 -8
  906. teradataml/data/us_air_pass.csv +36 -36
  907. teradataml/data/us_population.csv +624 -624
  908. teradataml/data/us_states_shapes.csv +52 -52
  909. teradataml/data/varmax_example.json +17 -17
  910. teradataml/data/vectordistance_example.json +25 -25
  911. teradataml/data/ville_climatedata.csv +121 -121
  912. teradataml/data/ville_tempdata.csv +12 -12
  913. teradataml/data/ville_tempdata1.csv +12 -12
  914. teradataml/data/ville_temperature.csv +11 -11
  915. teradataml/data/waveletTable.csv +1605 -1605
  916. teradataml/data/waveletTable2.csv +1605 -1605
  917. teradataml/data/weightedmovavg_example.json +8 -8
  918. teradataml/data/wft_testing.csv +5 -5
  919. teradataml/data/wine_data.csv +1600 -0
  920. teradataml/data/word_embed_input_table1.csv +5 -5
  921. teradataml/data/word_embed_input_table2.csv +4 -4
  922. teradataml/data/word_embed_model.csv +22 -22
  923. teradataml/data/words_input.csv +13 -13
  924. teradataml/data/xconvolve_complex_left.csv +6 -6
  925. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  926. teradataml/data/xgboost_example.json +35 -35
  927. teradataml/data/xgboostpredict_example.json +31 -31
  928. teradataml/dataframe/copy_to.py +1764 -1698
  929. teradataml/dataframe/data_transfer.py +2753 -2745
  930. teradataml/dataframe/dataframe.py +17545 -16946
  931. teradataml/dataframe/dataframe_utils.py +1837 -1740
  932. teradataml/dataframe/fastload.py +611 -603
  933. teradataml/dataframe/indexer.py +424 -424
  934. teradataml/dataframe/setop.py +1179 -1166
  935. teradataml/dataframe/sql.py +10090 -6432
  936. teradataml/dataframe/sql_function_parameters.py +439 -388
  937. teradataml/dataframe/sql_functions.py +652 -652
  938. teradataml/dataframe/sql_interfaces.py +220 -220
  939. teradataml/dataframe/vantage_function_types.py +674 -630
  940. teradataml/dataframe/window.py +693 -692
  941. teradataml/dbutils/__init__.py +3 -3
  942. teradataml/dbutils/dbutils.py +1167 -1150
  943. teradataml/dbutils/filemgr.py +267 -267
  944. teradataml/gen_ai/__init__.py +2 -2
  945. teradataml/gen_ai/convAI.py +472 -472
  946. teradataml/geospatial/__init__.py +3 -3
  947. teradataml/geospatial/geodataframe.py +1105 -1094
  948. teradataml/geospatial/geodataframecolumn.py +392 -387
  949. teradataml/geospatial/geometry_types.py +925 -925
  950. teradataml/hyperparameter_tuner/__init__.py +1 -1
  951. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  952. teradataml/hyperparameter_tuner/utils.py +281 -187
  953. teradataml/lib/aed_0_1.dll +0 -0
  954. teradataml/lib/libaed_0_1.dylib +0 -0
  955. teradataml/lib/libaed_0_1.so +0 -0
  956. teradataml/libaed_0_1.dylib +0 -0
  957. teradataml/libaed_0_1.so +0 -0
  958. teradataml/opensource/__init__.py +1 -0
  959. teradataml/opensource/sklearn/__init__.py +1 -0
  960. teradataml/opensource/sklearn/_class.py +255 -0
  961. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  962. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  963. teradataml/opensource/sklearn/constants.py +54 -0
  964. teradataml/options/__init__.py +121 -124
  965. teradataml/options/configure.py +337 -336
  966. teradataml/options/display.py +176 -176
  967. teradataml/plot/__init__.py +2 -2
  968. teradataml/plot/axis.py +1388 -1388
  969. teradataml/plot/constants.py +15 -15
  970. teradataml/plot/figure.py +398 -398
  971. teradataml/plot/plot.py +760 -760
  972. teradataml/plot/query_generator.py +83 -83
  973. teradataml/plot/subplot.py +216 -216
  974. teradataml/scriptmgmt/UserEnv.py +3788 -3761
  975. teradataml/scriptmgmt/__init__.py +3 -3
  976. teradataml/scriptmgmt/lls_utils.py +1616 -1604
  977. teradataml/series/series.py +532 -532
  978. teradataml/series/series_utils.py +71 -71
  979. teradataml/table_operators/Apply.py +949 -917
  980. teradataml/table_operators/Script.py +1719 -1982
  981. teradataml/table_operators/TableOperator.py +1207 -1616
  982. teradataml/table_operators/__init__.py +2 -3
  983. teradataml/table_operators/apply_query_generator.py +262 -262
  984. teradataml/table_operators/query_generator.py +507 -507
  985. teradataml/table_operators/table_operator_query_generator.py +460 -460
  986. teradataml/table_operators/table_operator_util.py +631 -639
  987. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  988. teradataml/table_operators/templates/dataframe_map.template +176 -176
  989. teradataml/table_operators/templates/script_executor.template +170 -170
  990. teradataml/utils/dtypes.py +684 -684
  991. teradataml/utils/internal_buffer.py +84 -84
  992. teradataml/utils/print_versions.py +205 -205
  993. teradataml/utils/utils.py +410 -410
  994. teradataml/utils/validators.py +2239 -2115
  995. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +270 -41
  996. teradataml-20.0.0.0.dist-info/RECORD +1038 -0
  997. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +1 -1
  998. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +1 -1
  999. teradataml/analytics/mle/AdaBoost.py +0 -651
  1000. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1001. teradataml/analytics/mle/Antiselect.py +0 -342
  1002. teradataml/analytics/mle/Arima.py +0 -641
  1003. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1004. teradataml/analytics/mle/Attribution.py +0 -1070
  1005. teradataml/analytics/mle/Betweenness.py +0 -658
  1006. teradataml/analytics/mle/Burst.py +0 -711
  1007. teradataml/analytics/mle/CCM.py +0 -600
  1008. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1009. teradataml/analytics/mle/CFilter.py +0 -460
  1010. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1011. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1012. teradataml/analytics/mle/Closeness.py +0 -737
  1013. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1014. teradataml/analytics/mle/Correlation.py +0 -477
  1015. teradataml/analytics/mle/Correlation2.py +0 -573
  1016. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1017. teradataml/analytics/mle/CoxPH.py +0 -556
  1018. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1019. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1020. teradataml/analytics/mle/DTW.py +0 -623
  1021. teradataml/analytics/mle/DWT.py +0 -564
  1022. teradataml/analytics/mle/DWT2D.py +0 -599
  1023. teradataml/analytics/mle/DecisionForest.py +0 -716
  1024. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1025. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1026. teradataml/analytics/mle/DecisionTree.py +0 -830
  1027. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1028. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1029. teradataml/analytics/mle/FMeasure.py +0 -402
  1030. teradataml/analytics/mle/FPGrowth.py +0 -734
  1031. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1032. teradataml/analytics/mle/GLM.py +0 -558
  1033. teradataml/analytics/mle/GLML1L2.py +0 -547
  1034. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1035. teradataml/analytics/mle/GLMPredict.py +0 -529
  1036. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1037. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1038. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1039. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1040. teradataml/analytics/mle/Histogram.py +0 -561
  1041. teradataml/analytics/mle/IDWT.py +0 -476
  1042. teradataml/analytics/mle/IDWT2D.py +0 -493
  1043. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1044. teradataml/analytics/mle/Interpolator.py +0 -918
  1045. teradataml/analytics/mle/KMeans.py +0 -485
  1046. teradataml/analytics/mle/KNN.py +0 -627
  1047. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1048. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1049. teradataml/analytics/mle/LAR.py +0 -439
  1050. teradataml/analytics/mle/LARPredict.py +0 -478
  1051. teradataml/analytics/mle/LDA.py +0 -548
  1052. teradataml/analytics/mle/LDAInference.py +0 -492
  1053. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1054. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1055. teradataml/analytics/mle/LinReg.py +0 -433
  1056. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1057. teradataml/analytics/mle/MinHash.py +0 -544
  1058. teradataml/analytics/mle/Modularity.py +0 -587
  1059. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1060. teradataml/analytics/mle/NERExtractor.py +0 -595
  1061. teradataml/analytics/mle/NERTrainer.py +0 -458
  1062. teradataml/analytics/mle/NGrams.py +0 -570
  1063. teradataml/analytics/mle/NPath.py +0 -634
  1064. teradataml/analytics/mle/NTree.py +0 -549
  1065. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1066. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1067. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1068. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1069. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1070. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1071. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1072. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1073. teradataml/analytics/mle/POSTagger.py +0 -417
  1074. teradataml/analytics/mle/Pack.py +0 -411
  1075. teradataml/analytics/mle/PageRank.py +0 -535
  1076. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1077. teradataml/analytics/mle/PathGenerator.py +0 -367
  1078. teradataml/analytics/mle/PathStart.py +0 -464
  1079. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1080. teradataml/analytics/mle/Pivot.py +0 -471
  1081. teradataml/analytics/mle/ROC.py +0 -425
  1082. teradataml/analytics/mle/RandomSample.py +0 -637
  1083. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1084. teradataml/analytics/mle/SAX.py +0 -779
  1085. teradataml/analytics/mle/SVMDense.py +0 -677
  1086. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1087. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1088. teradataml/analytics/mle/SVMSparse.py +0 -557
  1089. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1090. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1091. teradataml/analytics/mle/Sampling.py +0 -549
  1092. teradataml/analytics/mle/Scale.py +0 -565
  1093. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1094. teradataml/analytics/mle/ScaleMap.py +0 -378
  1095. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1096. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1097. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1098. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1099. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1100. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1101. teradataml/analytics/mle/Sessionize.py +0 -475
  1102. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1103. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1104. teradataml/analytics/mle/TF.py +0 -389
  1105. teradataml/analytics/mle/TFIDF.py +0 -504
  1106. teradataml/analytics/mle/TextChunker.py +0 -414
  1107. teradataml/analytics/mle/TextClassifier.py +0 -399
  1108. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1109. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1110. teradataml/analytics/mle/TextMorph.py +0 -494
  1111. teradataml/analytics/mle/TextParser.py +0 -623
  1112. teradataml/analytics/mle/TextTagger.py +0 -530
  1113. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1114. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1115. teradataml/analytics/mle/Unpack.py +0 -526
  1116. teradataml/analytics/mle/Unpivot.py +0 -438
  1117. teradataml/analytics/mle/VarMax.py +0 -776
  1118. teradataml/analytics/mle/VectorDistance.py +0 -762
  1119. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1120. teradataml/analytics/mle/XGBoost.py +0 -842
  1121. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1122. teradataml/analytics/mle/__init__.py +0 -123
  1123. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1124. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1125. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1126. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1127. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1128. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1129. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1130. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1131. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1132. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1133. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1134. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1135. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1136. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1137. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1138. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1139. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1140. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1141. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1142. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1143. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1144. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1145. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1146. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1147. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1148. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1149. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1150. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1151. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1152. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1153. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1154. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1155. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1156. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1157. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1158. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1159. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1160. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1161. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1162. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1163. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1164. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1165. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1166. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1167. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1168. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1169. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1170. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1171. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1172. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1173. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1174. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1175. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1176. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1177. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1178. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1179. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1180. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1181. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1182. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1183. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1184. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1185. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1186. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1187. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1188. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1189. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1190. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1191. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1192. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1193. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1194. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1195. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1196. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1197. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1198. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1199. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1200. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1201. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1202. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1203. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1204. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1205. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1206. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1207. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1208. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1209. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1210. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1211. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1212. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1213. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1214. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1215. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1216. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1217. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1218. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1219. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1220. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1221. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1222. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1223. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1224. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1225. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1226. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1227. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1228. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1229. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1230. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1231. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1232. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1233. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1234. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1235. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1236. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1237. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1238. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1239. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1240. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1241. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1242. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1243. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1244. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1245. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1246. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1247. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1248. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1249. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1250. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1251. teradataml/analytics/sqle/Antiselect.py +0 -321
  1252. teradataml/analytics/sqle/Attribution.py +0 -603
  1253. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1254. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1255. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1256. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1257. teradataml/analytics/sqle/NPath.py +0 -632
  1258. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1259. teradataml/analytics/sqle/Pack.py +0 -388
  1260. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1261. teradataml/analytics/sqle/Sessionize.py +0 -390
  1262. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1263. teradataml/analytics/sqle/Unpack.py +0 -503
  1264. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1265. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1266. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1267. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1268. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1269. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1270. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1271. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1272. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1273. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1274. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1275. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1276. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1277. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1278. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1279. teradataml/catalog/model_cataloging.py +0 -980
  1280. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1281. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1282. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1283. teradataml/table_operators/sandbox_container_util.py +0 -643
  1284. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1285. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
@@ -1,1698 +1,1764 @@
1
- #!/usr/bin/python
2
- # ##################################################################
3
- #
4
- # Copyright 2018 Teradata. All rights reserved.
5
- # TERADATA CONFIDENTIAL AND TRADE SECRET
6
- #
7
- # ##################################################################
8
-
9
- import re
10
- import datetime
11
- import warnings
12
- import pandas as pd
13
- import pandas.api.types as pt
14
-
15
- from sqlalchemy import MetaData, Table, Column
16
- from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
17
- from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
18
- from teradatasqlalchemy import (TIMESTAMP)
19
- from teradatasqlalchemy import (VARCHAR)
20
- from teradatasqlalchemy.dialect import TDCreateTablePost as post
21
- from teradataml.common.aed_utils import AedUtils
22
- from teradataml.context.context import *
23
- from teradataml.dataframe import dataframe as tdmldf
24
- from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
25
- from teradataml.dbutils.dbutils import _rename_table
26
- from teradataml.common.utils import UtilFuncs
27
- from teradataml.options.configure import configure
28
- from teradataml.common.constants import CopyToConstants, PTITableConstants
29
- from teradatasql import OperationalError
30
- from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
- from teradataml.utils.utils import execute_sql
32
- from teradataml.utils.validators import _Validators
33
-
34
-
35
-
36
- def copy_to_sql(df, table_name,
37
- schema_name=None, if_exists='append',
38
- index=False, index_label=None,
39
- primary_index=None,
40
- temporary=False, types = None,
41
- primary_time_index_name = None,
42
- timecode_column=None,
43
- timebucket_duration = None,
44
- timezero_date = None,
45
- columns_list=None,
46
- sequence_column=None,
47
- seq_max=None,
48
- set_table=False,
49
- chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
50
- match_column_order=True):
51
- """
52
- Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
53
-
54
- PARAMETERS:
55
-
56
- df:
57
- Required Argument.
58
- Specifies the Pandas or teradataml DataFrame object to be saved.
59
- Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
60
-
61
- table_name:
62
- Required Argument.
63
- Specifies the name of the table to be created in Vantage.
64
- Types : String
65
-
66
- schema_name:
67
- Optional Argument.
68
- Specifies the name of the SQL schema in Teradata Vantage to write to.
69
- Types: String
70
- Default: None (Uses default database schema).
71
-
72
- Note: schema_name will be ignored when temporary=True.
73
-
74
- if_exists:
75
- Optional Argument.
76
- Specifies the action to take when table already exists in Vantage.
77
- Types: String
78
- Possible values: {'fail', 'replace', 'append'}
79
- - fail: If table exists, do nothing.
80
- - replace: If table exists, drop it, recreate it, and insert data.
81
- - append: If table exists, insert data. Create if does not exist.
82
- Default : append
83
-
84
- Note: Replacing a table with the contents of a teradataml DataFrame based on
85
- the same underlying table is not supported.
86
-
87
- index:
88
- Optional Argument.
89
- Specifies whether to save Pandas DataFrame index as a column or not.
90
- Types : Boolean (True or False)
91
- Default : False
92
-
93
- Note: Only use as True when attempting to save Pandas DataFrames (and not with teradataml DataFrames).
94
-
95
- index_label:
96
- Optional Argument.
97
- Specifies the column label(s) for Pandas DataFrame index column(s).
98
- Types : String or list of strings
99
- Default : None
100
-
101
- Note: If index_label is not specified (defaulted to None or is empty) and `index` is True, then
102
- the 'names' property of the DataFrames index is used as the label(s),
103
- and if that too is None or empty, then:
104
- 1) a default label 'index_label' or 'level_0' (when 'index_label' is already taken) is used
105
- when index is standard.
106
- 2) default labels 'level_0', 'level_1', etc. are used when index is multi-level index.
107
-
108
- Only use as True when attempting to save Pandas DataFrames (and not on teradataml DataFrames).
109
-
110
- primary_index:
111
- Optional Argument.
112
- Specifies which column(s) to use as primary index while creating Teradata table(s) in Vantage.
113
- When None, No Primary Index Teradata tables are created.
114
- Types : String or list of strings
115
- Default : None
116
- Example:
117
- primary_index = 'my_primary_index'
118
- primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
119
-
120
- temporary:
121
- Optional Argument.
122
- Specifies whether to creates Vantage tables as permanent or volatile.
123
- Types : Boolean (True or False)
124
- Default : False
125
-
126
- Note: When True:
127
- 1. volatile Tables are created, and
128
- 2. schema_name is ignored.
129
- When False, permanent tables are created.
130
-
131
- types
132
- Optional Argument.
133
- Specifies required data-types for requested columns to be saved in Vantage.
134
- Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
135
- Default: None
136
-
137
- Note:
138
- 1. This argument accepts a dictionary of columns names and their required teradatasqlalchemy types
139
- as key-value pairs, allowing to specify a subset of the columns of a specific type.
140
- i) When the input is a Pandas DataFrame:
141
- - When only a subset of all columns are provided, the column types for the rest are assigned
142
- appropriately.
143
- - When types argument is not provided, the column types are assigned
144
- as listed in the following table:
145
- +---------------------------+-----------------------------------------+
146
- | Pandas/Numpy Type | teradatasqlalchemy Type |
147
- +---------------------------+-----------------------------------------+
148
- | int32 | INTEGER |
149
- +---------------------------+-----------------------------------------+
150
- | int64 | BIGINT |
151
- +---------------------------+-----------------------------------------+
152
- | bool | BYTEINT |
153
- +---------------------------+-----------------------------------------+
154
- | float32/float64 | FLOAT |
155
- +---------------------------+-----------------------------------------+
156
- | datetime64/datetime64[ns] | TIMESTAMP |
157
- +---------------------------+-----------------------------------------+
158
- | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
159
- +---------------------------+-----------------------------------------+
160
- | Any other data type | VARCHAR(configure.default_varchar_size) |
161
- +---------------------------+-----------------------------------------+
162
- ii) When the input is a teradataml DataFrame:
163
- - When only a subset of all columns are provided, the column types for the rest are retained.
164
- - When types argument is not provided, the column types are retained.
165
- 2. This argument does not have any effect when the table specified using table_name and schema_name
166
- exists and if_exists = 'append'.
167
-
168
- primary_time_index_name:
169
- Optional Argument.
170
- Specifies a name for the Primary Time Index (PTI) when the table
171
- to be created must be a PTI table.
172
- Type: String
173
-
174
- Note: This argument is not required or used when the table to be created
175
- is not a PTI table. It will be ignored if specified without the timecode_column.
176
-
177
- timecode_column:
178
- Optional argument.
179
- Required when the DataFrame must be saved as a PTI table.
180
- Specifies the column in the DataFrame that reflects the form
181
- of the timestamp data in the time series.
182
- This column will be the TD_TIMECODE column in the table created.
183
- It should be of SQL type TIMESTAMP(n), TIMESTAMP(n) WITH TIMEZONE, or DATE,
184
- corresponding to Python types datetime.datetime or datetime.date, or Pandas dtype datetime64[ns].
185
- Type: String
186
-
187
- Note: When you specify this parameter, an attempt to create a PTI table
188
- will be made. This argument is not required when the table to be created
189
- is not a PTI table. If this argument is specified, primary_index will be ignored.
190
-
191
- timezero_date:
192
- Optional Argument.
193
- Used when the DataFrame must be saved as a PTI table.
194
- Specifies the earliest time series data that the PTI table will accept;
195
- a date that precedes the earliest date in the time series data.
196
- Value specified must be of the following format: DATE 'YYYY-MM-DD'
197
- Default Value: DATE '1970-01-01'.
198
- Type: String
199
-
200
- Note: This argument is not required or used when the table to be created
201
- is not a PTI table. It will be ignored if specified without the timecode_column.
202
-
203
- timebucket_duration:
204
- Optional Argument.
205
- Required if columns_list is not specified or is None.
206
- Used when the DataFrame must be saved as a PTI table.
207
- Specifies a duration that serves to break up the time continuum in
208
- the time series data into discrete groups or buckets.
209
- Specified using the formal form time_unit(n), where n is a positive
210
- integer, and time_unit can be any of the following:
211
- CAL_YEARS, CAL_MONTHS, CAL_DAYS, WEEKS, DAYS, HOURS, MINUTES,
212
- SECONDS, MILLISECONDS, or MICROSECONDS.
213
- Type: String
214
-
215
- Note: This argument is not required or used when the table to be created
216
- is not a PTI table. It will be ignored if specified without the timecode_column.
217
-
218
- columns_list:
219
- Optional Argument.
220
- Used when the DataFrame must be saved as a PTI table.
221
- Required if timebucket_duration is not specified.
222
- A list of one or more PTI table column names.
223
- Type: String or list of Strings
224
-
225
- Note: This argument is not required or used when the table to be created
226
- is not a PTI table. It will be ignored if specified without the timecode_column.
227
-
228
- sequence_column:
229
- Optional Argument.
230
- Used when the DataFrame must be saved as a PTI table.
231
- Specifies the column of type Integer containing the unique identifier for
232
- time series data readings when they are not unique in time.
233
- * When specified, implies SEQUENCED, meaning more than one reading from the same
234
- sensor may have the same timestamp.
235
- This column will be the TD_SEQNO column in the table created.
236
- * When not specified, implies NONSEQUENCED, meaning there is only one sensor reading
237
- per timestamp.
238
- This is the default.
239
- Type: str
240
-
241
- Note: This argument is not required or used when the table to be created
242
- is not a PTI table. It will be ignored if specified without the timecode_column.
243
-
244
- seq_max:
245
- Optional Argument.
246
- Used when the DataFrame must be saved as a PTI table.
247
- Specifies the maximum number of sensor data rows that can have the
248
- same timestamp. Can be used when 'sequenced' is True.
249
- Accepted range: 1 - 2147483647.
250
- Default Value: 20000.
251
- Type: int
252
-
253
- Note: This argument is not required or used when the table to be created
254
- is not a PTI table. It will be ignored if specified without the timecode_column.
255
-
256
- set_table:
257
- Optional Argument.
258
- Specifies a flag to determine whether to create a SET or a MULTISET table.
259
- When True, a SET table is created.
260
- When False, a MULTISET table is created.
261
- Default Value: False
262
- Type: boolean
263
-
264
- Note: 1. Specifying set_table=True also requires specifying primary_index or timecode_column.
265
- 2. Creating SET table (set_table=True) may result in
266
- a. an error if the source is a Pandas DataFrame having duplicate rows.
267
- b. loss of duplicate rows if the source is a teradataml DataFrame.
268
- 3. This argument has no effect if the table already exists and if_exists='append'.
269
-
270
- chunksize:
271
- Optional Argument.
272
- Specifies the number of rows to be loaded in a batch.
273
- Note:
274
- This is argument is used only when argument "df" is pandas DataFrame.
275
- Default Value: 16383
276
- Types: int
277
-
278
- match_column_order:
279
- Optional Argument.
280
- Specifies whether the order of the columns in existing table matches the order of
281
- the columns in the "df" or not. When set to False, the dataframe to be loaded can
282
- have any order and number of columns.
283
- Default Value: True
284
- Types: bool
285
-
286
- RETURNS:
287
- None
288
-
289
- RAISES:
290
- TeradataMlException
291
-
292
- EXAMPLES:
293
- 1. Saving a Pandas DataFrame:
294
-
295
- >>> from teradataml.dataframe.copy_to import copy_to_sql
296
- >>> from teradatasqlalchemy.types import *
297
-
298
- >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
299
- 'emp_sage': [100, 200, 300, 400],
300
- 'emp_id': [133, 144, 155, 177],
301
- 'marks': [99.99, 97.32, 94.67, 91.00]
302
- }
303
-
304
- >>> pandas_df = pd.DataFrame(df)
305
-
306
- a) Save a Pandas DataFrame using a dataframe & table name only:
307
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table')
308
-
309
- b) Saving as a SET table
310
- >>> copy_to_sql(df = pandas_df, table_name = 'my_set_table', index=True,
311
- primary_index='index_label', set_table=True)
312
-
313
- c) Save a Pandas DataFrame by specifying additional parameters:
314
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_2', schema_name = 'alice',
315
- index = True, index_label = 'my_index_label', temporary = False,
316
- primary_index = ['emp_id'], if_exists = 'append',
317
- types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
318
- 'emp_id': BIGINT, 'marks': DECIMAL})
319
-
320
- d) Saving with additional parameters as a SET table
321
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
322
- index = True, index_label = 'my_index_label', temporary = False,
323
- primary_index = ['emp_id'], if_exists = 'append',
324
- types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
325
- 'emp_id': BIGINT, 'marks': DECIMAL},
326
- set_table=True)
327
-
328
- e) Saving levels in index of type MultiIndex
329
- >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
330
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
331
- index = True, index_label = ['index1', 'index2'], temporary = False,
332
- primary_index = ['index1'], if_exists = 'replace')
333
-
334
- 2. Saving a teradataml DataFrame:
335
-
336
- >>> from teradataml.dataframe.dataframe import DataFrame
337
- >>> from teradataml.dataframe.copy_to import copy_to_sql
338
- >>> from teradatasqlalchemy.types import *
339
- >>> from teradataml.data.load_example_data import load_example_data
340
-
341
- >>> # Load the data to run the example.
342
- >>> load_example_data("glm", "admissions_train")
343
-
344
- >>> # Create teradataml DataFrame(s)
345
- >>> df = DataFrame('admissions_train')
346
- >>> df2 = df.select(['gpa', 'masters'])
347
-
348
- a) Save a teradataml DataFrame by using only a table name:
349
- >>> df2.to_sql('my_tdml_table')
350
-
351
- b) Save a teradataml DataFrame by using additional parameters:
352
- >>> df2.to_sql(table_name = 'my_tdml_table', if_exists='append',
353
- primary_index = ['gpa'], temporary=False, schema_name='alice')
354
-
355
- c) Alternatively, save a teradataml DataFrame by using copy_to_sql:
356
- >>> copy_to_sql(df2, 'my_tdml_table_2')
357
-
358
- d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
359
- >>> copy_to_sql(df = df2, table_name = 'my_tdml_table_3', schema_name = 'alice',
360
- temporary = False, primary_index = None, if_exists = 'append',
361
- types = {'masters': VARCHAR, 'gpa':INTEGER})
362
-
363
- e) Saving as a SET table
364
- >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name = 'alice',
365
- temporary = False, primary_index = ['gpa'], if_exists = 'append',
366
- types = {'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
367
-
368
- 3. Saving a teradataml DataFrame as a PTI table:
369
-
370
- >>> from teradataml.dataframe.dataframe import DataFrame
371
- >>> from teradataml.dataframe.copy_to import copy_to_sql
372
- >>> from teradataml.data.load_example_data import load_example_data
373
-
374
- >>> load_example_data("sessionize", "sessionize_table")
375
- >>> df3 = DataFrame('sessionize_table')
376
-
377
- a) Using copy_to_sql
378
- >>> copy_to_sql(df3, "test_copyto_pti",
379
- timecode_column='clicktime',
380
- columns_list='event')
381
-
382
- b) Alternatively, using DataFrame.to_sql
383
- >>> df3.to_sql(table_name = "test_copyto_pti_1",
384
- timecode_column='clicktime',
385
- columns_list='event')
386
-
387
- c) Saving as a SET table
388
- >>> copy_to_sql(df3, "test_copyto_pti_2",
389
- timecode_column='clicktime',
390
- columns_list='event',
391
- set_table=True)
392
-
393
- """
394
- # Deriving global connection using get_connection().
395
- con = get_connection()
396
-
397
- try:
398
- if con is None:
399
- raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE), MessageCodes.CONNECTION_FAILURE)
400
-
401
- # Check if the table to be created must be a Primary Time Index (PTI) table.
402
- # If a user specifies the timecode_column parameter, and attempt to create
403
- # a PTI will be made.
404
- is_pti = False
405
- if timecode_column is not None:
406
- is_pti = True
407
- if primary_index is not None:
408
- warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
409
- 'primary_index',
410
- 'timecode_column',
411
- 'specified'))
412
- else:
413
- ignored = []
414
- if timezero_date is not None: ignored.append('timezero_date')
415
- if timebucket_duration is not None: ignored.append('timebucket_duration')
416
- if sequence_column is not None: ignored.append('sequence_column')
417
- if seq_max is not None: ignored.append('seq_max')
418
- if columns_list is not None and (
419
- not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
420
- if primary_time_index_name is not None: ignored.append('primary_time_index_name')
421
- if len(ignored) > 0:
422
- warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
423
- ignored,
424
- 'timecode_column',
425
- 'missing'))
426
-
427
- # Unset schema_name when temporary is True since volatile tables are always in the user database
428
- if temporary is True:
429
- if schema_name is not None:
430
- warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
431
- 'schema_name',
432
- 'temporary=True',
433
- 'specified'))
434
- schema_name = None
435
-
436
- # Validate DataFrame & related flags; Proceed only when True
437
- from teradataml.dataframe.data_transfer import _DataTransferUtils
438
- dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name,
439
- if_exists=if_exists, index=index, index_label=index_label,
440
- primary_index=primary_index, temporary=temporary,
441
- types=types, primary_time_index_name=primary_time_index_name,
442
- timecode_column=timecode_column,
443
- timebucket_duration=timebucket_duration,
444
- timezero_date=timezero_date, columns_list=columns_list,
445
- sequence_column=sequence_column, seq_max=seq_max,
446
- set_table=set_table, api_name='copy_to',
447
- chunksize=chunksize, match_column_order=match_column_order)
448
-
449
- dt_obj._validate()
450
-
451
- # If the table created must be a PTI table, then validate additional parameters
452
- # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
453
- # will be ignored - for example, primary_index
454
- if is_pti:
455
- _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
456
- timezero_date, primary_time_index_name, columns_list,
457
- sequence_column, seq_max, types, index, index_label)
458
-
459
- # A table cannot be a SET table and have NO PRIMARY INDEX
460
- if set_table and primary_index is None and timecode_column is None:
461
- raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
462
- MessageCodes.SET_TABLE_NO_PI)
463
-
464
- # Check if destination table exists
465
- table_exists = dt_obj._table_exists(con)
466
-
467
- # Raise an exception when the table exists and if_exists = 'fail'
468
- dt_obj._check_table_exists(is_table_exists=table_exists)
469
-
470
- # Is the input DataFrame a Pandas DataFrame?
471
- is_pandas_df = isinstance(df, pd.DataFrame)
472
-
473
- # Let's also execute the node and set the table_name when df is teradataml DataFrame
474
- if not is_pandas_df and df._table_name is None:
475
- df._table_name = df_utils._execute_node_return_db_object_name(df._nodeid, df._metaexpr)
476
-
477
- # Check table name conflict is present.
478
- is_conflict = _check_table_name_conflict(df, table_name) if isinstance(df, tdmldf.DataFrame) and \
479
- if_exists.lower() == 'replace' else False
480
-
481
- # Create a temporary table name, When table name conflict is present.
482
- if is_conflict:
483
- # Store actual destination table name for later use.
484
- dest_table_name = table_name
485
- table_name = UtilFuncs._generate_temp_table_name(prefix=table_name,
486
- table_type=TeradataConstants.TERADATA_TABLE,
487
- quote=False)
488
-
489
- # Let's create the SQLAlchemy table object to recreate the table
490
- if not table_exists or if_exists.lower() == 'replace':
491
- if not is_pti:
492
- table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
493
- types, None if not is_pandas_df else index,
494
- None if not is_pandas_df else index_label)
495
- else:
496
- table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
497
- primary_time_index_name, timecode_column, timezero_date,
498
- timebucket_duration, sequence_column, seq_max,
499
- columns_list, set_table, types,
500
- None if not is_pandas_df else index,
501
- None if not is_pandas_df else index_label)
502
-
503
- if table is not None:
504
- # If the table need to be replaced and there is no table name conflict,
505
- # let's drop the existing table first
506
- if table_exists and not is_conflict:
507
- tbl_name = dt_obj._get_fully_qualified_table_name()
508
- UtilFuncs._drop_table(tbl_name)
509
- try:
510
- table.create(bind=get_context())
511
- except sqlachemyOperationalError as err:
512
- raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
513
- '\n' + str(err),
514
- MessageCodes.TABLE_OBJECT_CREATION_FAILED)
515
- else:
516
- raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
517
- MessageCodes.TABLE_OBJECT_CREATION_FAILED)
518
-
519
- # Check column compatibility for insertion when table exists and if_exists = 'append'
520
- if table_exists and if_exists.lower() == 'append':
521
- UtilFuncs._get_warnings('set_table', set_table, 'if_exists', 'append')
522
-
523
- table = UtilFuncs._get_sqlalchemy_table(table_name,
524
- schema_name=schema_name)
525
-
526
- if table is not None:
527
- # ELE-2284
528
- # We are not considering types for 'append' mode as it is a simple insert and no casting is applied
529
- if is_pandas_df:
530
- cols = _extract_column_info(df, index=index, index_label=index_label)
531
- else:
532
- cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
533
- if match_column_order:
534
- cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
535
- is_pti, timecode_column, sequence_column)
536
-
537
- if not cols_compatible:
538
- raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
539
- MessageCodes.INSERTION_INCOMPATIBLE)
540
-
541
- # df is a Pandas DataFrame object
542
- if isinstance(df, pd.DataFrame):
543
- if not table_exists or if_exists.lower() == 'replace':
544
- try:
545
- # Support for saving Pandas index/Volatile is by manually inserting rows (batch) for now
546
- if index or is_pti:
547
- _insert_from_dataframe(df, con, schema_name, table_name, index,
548
- chunksize, is_pti, timecode_column,
549
- sequence_column, match_column_order)
550
-
551
- # When index isn't saved & for non-PTI tables, to_sql insertion used (batch)
552
- else:
553
- df.to_sql(table_name, get_context(), if_exists='append', index=False, index_label=None,
554
- chunksize=chunksize, schema=schema_name)
555
-
556
- except sqlachemyOperationalError as err:
557
- if "Duplicate row error" in str(err):
558
- raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_DUPICATE_ROW,
559
- table_name),
560
- MessageCodes.SET_TABLE_DUPICATE_ROW)
561
- else:
562
- raise
563
-
564
- elif table_exists and if_exists.lower() == 'append':
565
- _insert_from_dataframe(df, con, schema_name, table_name, index,
566
- chunksize, is_pti, timecode_column,
567
- sequence_column, match_column_order)
568
-
569
-
570
- # df is a teradataml DataFrame object (to_sql wrapper used)
571
- elif isinstance(df, tdmldf.DataFrame):
572
- df_column_list = [col.name for col in df._metaexpr.c]
573
-
574
- if is_pti:
575
- # Reorder the column list to reposition the timecode and sequence columns
576
- df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
577
-
578
- df_utils._insert_all_from_table(table_name, df._table_name, df_column_list, schema_name, temporary)
579
-
580
- # While table name conflict is present, Delete the source table after creation of temporary table.
581
- # Rename the temporary table to destination table name.
582
- if is_conflict and if_exists.lower() == 'replace':
583
- tbl_name = dt_obj._get_fully_qualified_table_name()
584
- UtilFuncs._drop_table(tbl_name)
585
- _rename_table(table_name, dest_table_name)
586
-
587
-
588
- except (TeradataMlException, ValueError, TypeError):
589
- raise
590
- except Exception as err:
591
- raise TeradataMlException(Messages.get_message(MessageCodes.COPY_TO_SQL_FAIL) + str(err),
592
- MessageCodes.COPY_TO_SQL_FAIL) from err
593
-
594
- def _check_table_name_conflict(df, table_name):
595
- """
596
- Check whether destination "table_name" matches with the teradataml dataframe parent nodes.
597
- This function traverse the DAG graph from child node to root node and checks for table name conflict.
598
-
599
- PARAMETERS:
600
- df:
601
- Required Argument.
602
- Specifies the teradataml DataFrame object to be checked.
603
- Types: teradataml.dataframe.dataframe.DataFrame
604
-
605
- table_name:
606
- Required Argument.
607
- Specifies the name of the table to be created in Vantage.
608
- Types : String
609
-
610
- RETURNS:
611
- A boolean value representing the presence of conflict.
612
-
613
- RAISES:
614
- None
615
-
616
- EXAMPLES:
617
- >>> df = DataFrame("sales")
618
- >>> table_name = "destination_table"
619
- >>> _check_table_name_conflict(df, table_name)
620
- """
621
- aed_obj = AedUtils()
622
- # Check if length of parent node count greater that 0.
623
- if aed_obj._aed_get_parent_node_count(df._nodeid) > 0:
624
- # Let's check "table_name" matches with any of the parent nodes table name.
625
- # Get current table node id.
626
- node_id = df._nodeid
627
- while node_id:
628
-
629
- # Get the parent node id using current table node id.
630
- parent_node_id = aed_obj._aed_get_parent_nodeids(node_id)
631
-
632
- if parent_node_id:
633
- # Check "table_name" matches with the parent "table_name".
634
- # If table name matches, then return 'True'.
635
- # Otherwise, Traverse the graph from current node to the top most root node.
636
- if table_name in aed_obj._aed_get_source_tablename(parent_node_id[0]):
637
- return True
638
- else:
639
- node_id = parent_node_id[0]
640
- else:
641
- # When parent_node_id is empty return 'False'.
642
- return False
643
- return False
644
-
645
-
646
- def _get_sqlalchemy_table_from_tdmldf(df, meta):
647
- """
648
- This is an internal function used to generate an SQLAlchemy Table
649
- object for the underlying table/view of a DataFrame.
650
-
651
- PARAMETERS:
652
- df:
653
- The teradataml DataFrame to generate the SQLAlchemy.Table object for.
654
-
655
- meta:
656
- The SQLAlchemy.Metadata object.
657
-
658
- RETURNS:
659
- SQLAlchemy.Table
660
-
661
- RAISES:
662
- None
663
-
664
- EXAMPLES:
665
- >>> con = get_connection()
666
- >>> df = DataFrame('admissions_train')
667
- >>> meta = sqlalchemy.MetaData()
668
- >>> table = __get_sqlalchemy_table_from_tdmldf(df, meta)
669
-
670
- """
671
- con = get_connection()
672
- db_schema = UtilFuncs._extract_db_name(df._table_name)
673
- db_table_name = UtilFuncs._extract_table_name(df._table_name)
674
-
675
- return Table(db_table_name, meta, schema=db_schema, autoload_with=get_context())
676
-
677
-
678
- def _get_index_labels(df, index_label):
679
- """
680
- Internal function to construct a list of labels for the indices to be saved from the Pandas DataFrames
681
- based on user input and information from the DataFrame.
682
-
683
- PARAMETERS:
684
- df:
685
- The Pandas input DataFrame.
686
-
687
- index_label:
688
- The user provided label(s) for the indices.
689
-
690
- RAISES:
691
- None
692
-
693
- RETURNS:
694
- A list of Strings corresponding the to labels for the indices to add as columns.
695
-
696
- EXAMPLES:
697
- _get_index_labels(df, index_label)
698
- """
699
- default_index_label = 'index_label'
700
- default_level_prefix = 'level_'
701
- level_cnt = 0
702
-
703
- is_multi_index = isinstance(df.index, pd.MultiIndex)
704
- ind_types = [level.dtype for level in df.index.levels] if is_multi_index else [df.index.dtype]
705
-
706
- ind_names = []
707
- if index_label:
708
- ind_names = [index_label] if isinstance(index_label, str) else index_label
709
- else:
710
- for name in df.index.names:
711
- if name not in ('', None):
712
- ind_names.append(name)
713
- else:
714
- if is_multi_index:
715
- ind_names.append(default_level_prefix + str(level_cnt))
716
- level_cnt = level_cnt + 1
717
- else:
718
- df_columns = _get_pd_df_column_names(df)
719
- label = default_level_prefix + str(level_cnt) if default_index_label in df_columns else default_index_label
720
- ind_names.append(label)
721
-
722
- return ind_names, ind_types
723
-
724
- def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
725
- timezero_date, primary_time_index_name, columns_list,
726
- sequence_column, seq_max, types, index, index_label):
727
- """
728
- This is an internal function used to validate the PTI part of copy request.
729
- Dataframe, connection & related parameters are checked.
730
- Saving to Vantage is proceeded to only when validation returns True.
731
-
732
- PARAMETERS:
733
- df:
734
- The DataFrame (Pandas or teradataml) object to be saved.
735
-
736
- timecode_column:
737
- The column in the DataFrame that reflects the form of the timestamp
738
- data in the time series.
739
- Type: String
740
-
741
- timebucket_duration:
742
- A duration that serves to break up the time continuum in
743
- the time series data into discrete groups or buckets.
744
- Type: String
745
-
746
- timezero_date:
747
- Specifies the earliest time series data that the PTI table will accept.
748
- Type: String
749
-
750
- primary_time_index_name:
751
- A name for the Primary Time Index (PTI).
752
- Type: String
753
-
754
- columns_list:
755
- A list of one or more PTI table column names.
756
- Type: String or list of Strings
757
-
758
- sequence_column:
759
- Specifies a column of type Integer with sequences implying that the
760
- time series data readings are not unique.
761
- If not specified, the time series data are assumed to be unique in time.
762
- Type: String
763
-
764
- seq_max:
765
- Specifies the maximum number of sensor data rows that can have the
766
- same timestamp. Can be used when 'sequenced' is True.
767
- Accepted range: 1 - 2147483647.
768
- Type: int
769
-
770
- types:
771
- Dictionary specifying column-name to teradatasqlalchemy type-mapping.
772
-
773
- index:
774
- Flag specifying whether to write Pandas DataFrame index as a column or not.
775
- Type: bool
776
-
777
- index_label:
778
- Column label for index column(s).
779
- Type: String
780
-
781
- RETURNS:
782
- True, when all parameters are valid.
783
-
784
- RAISES:
785
- TeradataMlException, when parameter validation fails.
786
-
787
- EXAMPLES:
788
- _validate_pti_copy_parameters(df = my_df, timecode_column = 'ts', timbucket_duration = 'HOURS(2)')
789
- """
790
- if isinstance(df, pd.DataFrame):
791
- df_columns = _get_pd_df_column_names(df)
792
- else:
793
- df_columns = [col.name for col in df._metaexpr.c]
794
-
795
- awu = AnalyticsWrapperUtils()
796
- awu_matrix = []
797
-
798
- # The arguments added to awu_martix are:
799
- # arg_name, arg, is_optional, acceptable types
800
- # The value for is_optional is set to False when the argument
801
- # a) is a required argument
802
- # b) is not allowed to be None, even if it is optional
803
- awu_matrix.append(['timecode_column', timecode_column, False, (str)])
804
- awu_matrix.append(['columns_list', columns_list, True, (str, list)])
805
- awu_matrix.append(['timezero_date', timezero_date, True, (str)])
806
- awu_matrix.append(['timebucket_duration', timebucket_duration, True, (str)])
807
- awu_matrix.append(['primary_time_index_name', primary_time_index_name, True, (str)])
808
- awu_matrix.append(['sequence_column', sequence_column, True, (str)])
809
- awu_matrix.append(['seq_max', seq_max, True, (int)])
810
-
811
- # Validate types
812
- awu._validate_argument_types(awu_matrix)
813
-
814
- # Validate arg emtpy
815
- awu._validate_input_columns_not_empty(timecode_column, 'timecode_column')
816
- awu._validate_input_columns_not_empty(columns_list, 'columns_list')
817
- awu._validate_input_columns_not_empty(timezero_date, 'timezero_date')
818
- awu._validate_input_columns_not_empty(timebucket_duration, 'timebucket_duration')
819
- awu._validate_input_columns_not_empty(sequence_column, 'sequence_column')
820
-
821
- # Validate all the required arguments and optional arguments when not none
822
- # First the timecode_column
823
- _validate_column_in_list_of_columns('df', df_columns, timecode_column, 'timecode_column')
824
- # Check the type of timecode_column
825
- _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES.value,
826
- types, index, index_label)
827
-
828
- # timezero date
829
- _validate_timezero_date(timezero_date)
830
-
831
- # timebucket duration
832
- _Validators._validate_timebucket_duration(timebucket_duration)
833
-
834
- # Validate sequence_column
835
- if sequence_column is not None:
836
- _validate_column_in_list_of_columns('df', df_columns, sequence_column, 'sequence_column')
837
- # Check the type of sequence_column
838
- _validate_column_type(df, sequence_column, 'sequence_column',
839
- PTITableConstants.VALID_SEQUENCE_COL_DATATYPES.value, types, index, index_label)
840
-
841
- # Validate seq_max
842
- if seq_max is not None and (seq_max < 1 or seq_max > 2147483647):
843
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(seq_max, 'seq_max', '1 < integer < 2147483647'),
844
- MessageCodes.INVALID_ARG_VALUE)
845
-
846
- # Validate cols_list
847
- _validate_columns_list('df', df_columns, columns_list)
848
- if isinstance(columns_list, str):
849
- columns_list = [columns_list]
850
-
851
- # Either one or both of timebucket_duration and columns_list must be specified
852
- if timebucket_duration is None and (columns_list is None or len(columns_list) == 0):
853
- raise TeradataMlException(
854
- Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, 'timebucket_duration', 'columns_list'),
855
- MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
856
-
857
-
858
- def _validate_columns_list(df, df_columns, columns_list):
859
- """
860
- Internal function to validate columns list specified when creating a
861
- Primary Time Index (PTI) table.
862
-
863
- PARAMETERS:
864
- df:
865
- Name of the DataFrame to which the column being validated
866
- does or should belong.
867
-
868
- df_columns:
869
- List of columns in the DataFrame.
870
-
871
- columns_list:
872
- The column or list of columns.
873
- Type: String or list of Strings
874
-
875
- RETURNS:
876
- True if the column or list of columns is valid.
877
-
878
- RAISES:
879
- Raise TeradataMlException on validation failure.
880
- """
881
- if columns_list is None:
882
- return True
883
-
884
- # Validate DF has columns
885
- if isinstance(columns_list, str):
886
- columns_list = [columns_list]
887
-
888
- for col in columns_list:
889
- _validate_column_in_list_of_columns(df, df_columns, col, 'columns_list')
890
-
891
- return True
892
-
893
-
894
- def _validate_column_in_list_of_columns(df, df_columns, col, col_arg):
895
- """
896
- Internal function to validate the arguments used to specify
897
- a column name in DataFrame.
898
-
899
- PARAMETERS:
900
- df:
901
- Name of the DataFrame to which the column being validated
902
- does or should belong.
903
-
904
- df_column_list:
905
- List of columns in the DataFrame.
906
-
907
- col:
908
- Column to be validated.
909
-
910
- col_arg:
911
- Name of argument used to specify the column name.
912
-
913
- RETURNS:
914
- True, if column name is a valid.
915
-
916
- RAISES:
917
- TeradataMlException if invalid column name.
918
- """
919
- if col not in df_columns:
920
- raise TeradataMlException(
921
- Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(col,
922
- col_arg,
923
- df,
924
- 'DataFrame'),
925
- MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND)
926
-
927
- return True
928
-
929
-
930
- def _validate_column_type(df, col, col_arg, expected_types, types = None, index = False, index_label = None):
931
- """
932
- Internal function to validate the type of an input DataFrame column against
933
- a list of expected types.
934
-
935
- PARAMETERS
936
- df:
937
- Input DataFrame (Pandas or teradataml) which has the column to be tested
938
- for type.
939
-
940
- col:
941
- The column in the input DataFrame to be tested for type.
942
-
943
- col_arg:
944
- The name of the argument used to pass the column name.
945
-
946
- expected_types:
947
- Specifies a list of teradatasqlachemy datatypes that the column is
948
- expected to be of type.
949
-
950
- types:
951
- Dictionary specifying column-name to teradatasqlalchemy type-mapping.
952
-
953
- RETURNS:
954
- True, when the columns is of an expected type.
955
-
956
- RAISES:
957
- TeradataMlException, when the columns is not one of the expected types.
958
-
959
- EXAMPLES:
960
- _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES, types)
961
- """
962
- # Check if sequence_column is being translated to a valid_type
963
- if types is not None and col in types:
964
- if not any(isinstance(types[col], expected_type) for expected_type in expected_types):
965
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
966
- format(col_arg, types[col], ' or '.join(expected_type.__visit_name__
967
- for expected_type in expected_types)),
968
- MessageCodes.INVALID_COLUMN_TYPE)
969
- # Else we need to copy without any casting
970
- elif isinstance(df, pd.DataFrame):
971
- t = _get_sqlalchemy_mapping(str(df.dtypes[col]))
972
- if t not in expected_types:
973
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
974
- format(col_arg, t, ' or '.join(expected_type.__visit_name__
975
- for expected_type in expected_types)),
976
- MessageCodes.INVALID_COLUMN_TYPE)
977
- elif not any(isinstance(df[col].type, t) for t in expected_types):
978
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
979
- format(col_arg, df[col].type, ' or '.join(expected_type.__visit_name__
980
- for expected_type in expected_types)),
981
- MessageCodes.INVALID_COLUMN_TYPE)
982
-
983
- return True
984
-
985
-
986
- def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
987
- index_label=None):
988
- """
989
- This is an internal function used to construct a SQLAlchemy Table Object.
990
- This function checks appropriate flags and supports creation of Teradata
991
- specific Table constructs such as Volatile/Primary Index tables.
992
-
993
-
994
- PARAMETERS:
995
- df:
996
- The teradataml or Pandas DataFrame object to be saved.
997
-
998
- table_name:
999
- Name of SQL table.
1000
-
1001
- con:
1002
- A SQLAlchemy connectable (engine/connection) object
1003
-
1004
- primary_index:
1005
- Creates Teradata Table(s) with Primary index column if specified.
1006
-
1007
- temporary:
1008
- Flag specifying whether SQL table to be created is Volatile or not.
1009
-
1010
- schema_name:
1011
- Specifies the name of the SQL schema in the database to write to.
1012
-
1013
- set_table:
1014
- A flag specifying whether to create a SET table or a MULTISET table.
1015
- When True, an attempt to create a SET table is made.
1016
- When False, an attempt to create a MULTISET table is made.
1017
-
1018
- types:
1019
- Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1020
-
1021
- index:
1022
- Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1023
-
1024
- index_label:
1025
- Column label(s) for index column(s).
1026
-
1027
- RETURNS:
1028
- SQLAlchemy Table
1029
-
1030
- RAISES:
1031
- N/A
1032
-
1033
- EXAMPLES:
1034
- _create_table_object(df = my_df, table_name = 'test_table', con = tdconnection, primary_index = None,
1035
- temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1036
- _create_table_object(df = csv_filepath, table_name = 'test_table', con = tdconnection, primary_index = None,
1037
- temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1038
- """
1039
- # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1040
- post_params = {}
1041
- prefix = []
1042
- pti = post(opts=post_params)
1043
-
1044
- if temporary is True:
1045
- pti = pti.on_commit(option='preserve')
1046
- prefix.append('VOLATILE')
1047
-
1048
- if not set_table:
1049
- prefix.append('multiset')
1050
- else:
1051
- prefix.append('set')
1052
-
1053
- meta = MetaData()
1054
- meta.bind = con
1055
-
1056
- if isinstance(df, pd.DataFrame):
1057
- col_names, col_types = _extract_column_info(df, types, index, index_label)
1058
- elif isinstance(df, str):
1059
- col_names, col_types = _extract_column_info(df, types)
1060
- else:
1061
- col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1062
- if types is not None:
1063
- # When user-type provided use, or default when partial types provided.
1064
- col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1065
-
1066
- if primary_index is not None:
1067
- if isinstance(primary_index, list):
1068
- pti = pti.primary_index(unique=False, cols=primary_index)
1069
- elif isinstance(primary_index, str):
1070
- pti = pti.primary_index(unique=False, cols=[primary_index])
1071
- else:
1072
- pti = pti.no_primary_index()
1073
-
1074
- # Create default Table construct with parameter dictionary
1075
- table = Table(table_name, meta,
1076
- *(Column(col_name, col_type)
1077
- for col_name, col_type in
1078
- zip(col_names, col_types)),
1079
- teradatasql_post_create=pti,
1080
- prefixes=prefix,
1081
- schema=schema_name
1082
- )
1083
-
1084
- return table
1085
-
1086
-
1087
- def _create_pti_table_object(df, con, table_name, schema_name, temporary, primary_time_index_name,
1088
- timecode_column, timezero_date, timebucket_duration,
1089
- sequence_column, seq_max, columns_list, set_table, types, index=None, index_label=None):
1090
- """
1091
- This is an internal function used to construct a SQLAlchemy Table Object.
1092
- This function checks appropriate flags and supports creation of Teradata
1093
- specific Table constructs such as Volatile and Primary Time Index tables.
1094
-
1095
- PARAMETERS:
1096
- df:
1097
- The teradataml or Pandas DataFrame object to be saved.
1098
-
1099
- con:
1100
- A SQLAlchemy connectable (engine/connection) object
1101
-
1102
- table_name:
1103
- Name of SQL table.
1104
-
1105
- schema_name:
1106
- Specifies the name of the SQL schema in the database to write to.
1107
-
1108
- temporary:
1109
- Flag specifying whether SQL table to be created is Volatile or not.
1110
-
1111
- primary_time_index_name:
1112
- A name for the Primary Time Index (PTI).
1113
-
1114
- timecode_column:
1115
- The column in the DataFrame that reflects the form of the timestamp
1116
- data in the time series.
1117
-
1118
- timezero_date:
1119
- Specifies the earliest time series data that the PTI table will accept.
1120
-
1121
- timebucket_duration:
1122
- A duration that serves to break up the time continuum in
1123
- the time series data into discrete groups or buckets.
1124
-
1125
- sequence_column:
1126
- Specifies a column with sequences implying that time series data
1127
- readings are not unique. If not specified, the time series data are
1128
- assumed to be unique.
1129
-
1130
- seq_max:
1131
- Specifies the maximum number of sensor data rows that can have the
1132
- same timestamp. Can be used when 'sequenced' is True.
1133
-
1134
- columns_list:
1135
- A list of one or more PTI table column names.
1136
-
1137
- set_table:
1138
- A flag specifying whether to create a SET table or a MULTISET table.
1139
- When True, an attempt to create a SET table is made.
1140
- When False, an attempt to create a MULTISET table is made.
1141
-
1142
- types:
1143
- Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1144
-
1145
- index:
1146
- Flag specifying whether to write Pandas DataFrame index as a column or not.
1147
-
1148
- index_label:
1149
- Column label for index column(s).
1150
-
1151
- RETURNS:
1152
- SQLAlchemy Table
1153
-
1154
- RAISES:
1155
- N/A
1156
-
1157
- EXAMPLES:
1158
- _create_pti_table_object(df = my_df, table_name = 'test_table', con = tdconnection,
1159
- timecode_column = 'ts', columns_list = ['user_id', 'location'])
1160
-
1161
- """
1162
- meta = MetaData()
1163
-
1164
- if isinstance(df, pd.DataFrame):
1165
- col_names, col_types = _extract_column_info(df, types, index, index_label)
1166
- timecode_datatype = col_types[col_names.index(timecode_column)]()
1167
- else:
1168
- col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1169
- if types is not None:
1170
- # When user-type provided use, or default when partial types provided
1171
- col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1172
- timecode_datatype = df[timecode_column].type
1173
-
1174
- # Remove timecode and sequence column from col_name and col_types
1175
- # since the required columns will be created automatically
1176
- if timecode_column in col_names:
1177
- ind = col_names.index(timecode_column)
1178
- col_names.pop(ind)
1179
- col_types.pop(ind)
1180
-
1181
- if sequence_column is not None and sequence_column in col_names:
1182
- ind = col_names.index(sequence_column)
1183
- col_names.pop(ind)
1184
- col_types.pop(ind)
1185
-
1186
- # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1187
- post_params = {}
1188
- prefix = []
1189
- pti = post(opts=post_params)
1190
-
1191
- # Create Table object with appropriate Primary Time Index/Prefix for volatile
1192
- if temporary:
1193
- pti = pti.on_commit(option='preserve')
1194
- prefix.append('VOLATILE')
1195
-
1196
- if not set_table:
1197
- prefix.append('multiset')
1198
- else:
1199
- prefix.append('set')
1200
-
1201
- pti = pti.primary_time_index(timecode_datatype,
1202
- name=primary_time_index_name,
1203
- timezero_date=timezero_date,
1204
- timebucket_duration=timebucket_duration,
1205
- sequenced=True if sequence_column is not None else False,
1206
- seq_max=seq_max,
1207
- cols=columns_list)
1208
-
1209
- table = Table(table_name, meta,
1210
- *(Column(col_name, col_type)
1211
- for col_name, col_type in
1212
- zip(col_names, col_types)),
1213
- teradatasql_post_create=pti,
1214
- prefixes=prefix,
1215
- schema=schema_name
1216
- )
1217
-
1218
- return table
1219
-
1220
-
1221
- def _rename_column(col_names, search_for, rename_to):
1222
- """
1223
- Internal function to rename a column in a list of columns of a Pandas DataFrame.
1224
-
1225
- PARAMETERS:
1226
- col_names:
1227
- Required Argument.
1228
- The list of column names of the Pandas DataFrame.
1229
-
1230
- search_for:
1231
- Required Argument.
1232
- The column name that need to be changed/renamed.
1233
-
1234
- rename_to:
1235
- Required Argument.
1236
- The column name that the 'search_for' column needs to be replaced with.
1237
-
1238
- RETURNS:
1239
- A list of renamed columns list.
1240
-
1241
- EXAMPLES:
1242
- cols = _rename_column(cols, 'col_1', 'new_col_1')
1243
- """
1244
- ind = col_names.index(search_for)
1245
- col_names.pop(ind)
1246
- col_names.insert(ind, rename_to)
1247
-
1248
- return col_names
1249
-
1250
-
1251
- def _rename_to_pti_columns(col_names, timecode_column, sequence_column,
1252
- timecode_column_index=None, sequence_column_index=None):
1253
- """
1254
- Internal function to generate a list of renamed columns of a Pandas DataFrame to match that of the PTI table column names
1255
- in Vantage, or revert any such changes made.
1256
-
1257
- PARAMETERS:
1258
- col_names:
1259
- The list of column names of the Pandas DataFrame.
1260
-
1261
- timecode_column:
1262
- The column name that reflects the timecode column in the PTI table.
1263
-
1264
- sequence_column:
1265
- The column name that reflects the sequence column in the PTI table.
1266
-
1267
- timecode_column_index:
1268
- The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1269
- performed.
1270
-
1271
- sequence_column_index:
1272
- The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1273
- performed.
1274
-
1275
- RETURNS:
1276
- A list of renamed PTI related columns.
1277
-
1278
- EXAMPLES:
1279
- cols = _rename_to_pti_columns(cols, timecode_column, sequence_column, t_index=None, s_index)
1280
- cols = _rename_to_pti_columns(cols, timecode_column, sequence_column)
1281
- """
1282
- # Rename the timecode_column to what it is in Vantage
1283
- if timecode_column_index is not None:
1284
- col_names = _rename_column(col_names, PTITableConstants.TD_TIMECODE.value, timecode_column)
1285
- else:
1286
- col_names = _rename_column(col_names, timecode_column, PTITableConstants.TD_TIMECODE.value)
1287
-
1288
- # Rename the sequence_column to what it is in Vantage
1289
- if sequence_column is not None:
1290
- if sequence_column_index is not None:
1291
- col_names = _rename_column(col_names, PTITableConstants.TD_SEQNO.value, sequence_column)
1292
- else:
1293
- col_names = _rename_column(col_names, sequence_column, PTITableConstants.TD_SEQNO.value)
1294
-
1295
- return col_names
1296
-
1297
-
1298
- def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column, df_col_type_list = None):
1299
- """
1300
- Internal function to reorder the list of columns used to construct the 'INSERT INTO'
1301
- statement as required when the target table is a PTI table.
1302
-
1303
- PARAMETERS:
1304
- df_column_list:
1305
- A list of column names for the columns in the DataFrame.
1306
-
1307
- timecode_column:
1308
- The timecode_columns which should be moved to the first position.
1309
-
1310
- sequence_column:
1311
- The timecode_columns which should be moved to the first position.
1312
-
1313
- df_col_type_list:
1314
- Optionally reorder the list containing the types of the columns to match the
1315
- reordering the of df_column_list.
1316
-
1317
- RETURNS:
1318
- A reordered list of columns names for the columns in the DataFrame.
1319
- If the optional types list is also specified, then a tuple of the list reordered columns names
1320
- and the list of the column types.
1321
-
1322
- EXAMPLE:
1323
- new_colname_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
1324
- new_colname_list, new_type_list = _reorder_insert_list_for_pti(df_column_list, timecode_column,
1325
- sequence_column, df_col_type_list)
1326
- """
1327
- # Reposition timecode (to the first) and sequence column (to the second)
1328
- # in df_column_list
1329
- timecode_column_index = df_column_list.index(timecode_column)
1330
- df_column_list.insert(0, df_column_list.pop(timecode_column_index))
1331
- if df_col_type_list is not None:
1332
- df_col_type_list.insert(0, df_col_type_list.pop(timecode_column_index))
1333
-
1334
- if sequence_column is not None:
1335
- sequence_column_index = df_column_list.index(sequence_column)
1336
- df_column_list.insert(1, df_column_list.pop(sequence_column_index))
1337
- if df_col_type_list is not None:
1338
- df_col_type_list.insert(0, df_col_type_list.pop(sequence_column_index))
1339
-
1340
- if df_col_type_list is not None:
1341
- return df_column_list, df_col_type_list
1342
- else:
1343
- return df_column_list
1344
-
1345
-
1346
- def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
1347
- is_pti=False, timecode_column=None, sequence_column=None):
1348
- """
1349
- Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
1350
- and check if the number of columns and their names are matching to determine table insertion compatibility.
1351
-
1352
- PARAMETERS:
1353
- table1_col_object:
1354
- Specifies a list/collection of SQLAlchemy ColumnExpression Objects for first table.
1355
-
1356
- table2_cols:
1357
- Specifies a list of column names for second table (teradataml DataFrame).
1358
-
1359
- is_pandas_df:
1360
- Flag specifying whether the table objects to check are pandas DataFrames or not
1361
- Default: False
1362
- Note: When this flag is True, table2_cols is passed as a tuple object of
1363
- ([column_names], [column_types])
1364
-
1365
- is_pti:
1366
- Boolean flag indicating if the target table is a PTI table.
1367
-
1368
- timecode_column:
1369
- timecode_column required to order the select expression for the insert.
1370
- It should be the first column in the select expression.
1371
- q
1372
- sequence_column:
1373
- sequence_column required to order the select expression for the insert.
1374
- It should be the second column in the select expression.
1375
-
1376
-
1377
- RETURNS:
1378
- a) True, when insertion compatible (number of columns and their names match)
1379
- b) False, otherwise
1380
-
1381
- RAISES:
1382
- N/A
1383
-
1384
- EXAMPLES:
1385
- _check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
1386
- _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
1387
-
1388
- """
1389
- table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
1390
- table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
1391
-
1392
- # Check for number of columns
1393
- if len(table1_col_names) != len(table2_col_names):
1394
- return False
1395
-
1396
- if is_pti is True:
1397
- # Reposition timecode (to the first) and sequence column (to the second)
1398
- # with their names as generated by the database, in col_name since that
1399
- # is the default position of the columns.
1400
- table2_col_names = _reorder_insert_list_for_pti(table2_col_names, timecode_column, sequence_column)
1401
- table2_col_names = _rename_to_pti_columns(table2_col_names, timecode_column, sequence_column)
1402
-
1403
- # Check for the column names
1404
- for i in range(len(table1_col_names)):
1405
- if table1_col_names[i] != table2_col_names[i]:
1406
- return False
1407
-
1408
- # Number of columns and their names in both List of ColumnExpressions match
1409
- return True
1410
-
1411
-
1412
- def _extract_column_info(df, types = None, index = False, index_label = None):
1413
- """
1414
- This is an internal function used to extract column information for a DF,
1415
- and map to user-specified teradatasqlalchemy types, if specified,
1416
- for Table creation.
1417
-
1418
- PARAMETERS:
1419
- df:
1420
- The Pandas DataFrame object to be saved.
1421
-
1422
- types:
1423
- A python dictionary with column names and required types as key-value pairs.
1424
-
1425
- index:
1426
- Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1427
-
1428
- index_label:
1429
- Column label(s) for index column(s).
1430
-
1431
- RETURNS:
1432
- A tuple with the following elements:
1433
- a) List of DataFrame Column names
1434
- b) List of equivalent teradatasqlalchemy column types
1435
-
1436
- RAISES:
1437
- None
1438
-
1439
- EXAMPLES:
1440
- _extract_column_info(df = my_df)
1441
- _extract_column_info(df = my_df, types = {'id_col': INTEGER})
1442
-
1443
- """
1444
- if isinstance(df, str):
1445
- return list(types.keys()), list(types.values())
1446
-
1447
- col_names = _get_pd_df_column_names(df)
1448
-
1449
- # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1450
- # TIMESTAMP(timezone=True) else map it according to default value.
1451
- col_types = [types.get(col_name) if types and col_name in types else
1452
- TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1453
- and (df[col_name].dt.tz is not None)
1454
- else _get_sqlalchemy_mapping(str(df.dtypes[key]))
1455
- for key, col_name in enumerate(list(df.columns))]
1456
-
1457
- ind_names = []
1458
- ind_types = []
1459
- if index:
1460
- ind_names, ind_types = _get_index_labels(df, index_label)
1461
- ind_types = [types.get(ind_name) if types and ind_name in types
1462
- else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1463
- and (df[ind_name].dt.tz is not None)
1464
- else _get_sqlalchemy_mapping(str(ind_types[key]))
1465
- for key, ind_name in enumerate(ind_names)]
1466
-
1467
- return col_names + ind_names, col_types + ind_types
1468
-
1469
- def _insert_from_dataframe(df, con, schema_name, table_name, index, chunksize,
1470
- is_pti=False, timecode_column=None, sequence_column=None,
1471
- match_column_order=True):
1472
- """
1473
- This is an internal function used to sequentially extract column info from DF,
1474
- iterate rows, and insert rows manually.
1475
- Used for Insertions to Temporary Tables & Tables with Pandas index.
1476
-
1477
- This uses DBAPI's executeMany() which is a batch insertion method.
1478
-
1479
- PARAMETERS:
1480
- df:
1481
- The Pandas DataFrame object to be saved.
1482
-
1483
- con:
1484
- A SQLAlchemy connectable (engine/connection) object
1485
-
1486
- schema_name:
1487
- Name of the schema.
1488
-
1489
- table_name:
1490
- Name of the table.
1491
-
1492
- index:
1493
- Flag specifying whether to write Pandas DataFrame index as a column or not.
1494
-
1495
- chunksize:
1496
- Specifies the number of rows to be loaded in a batch.
1497
- Note:
1498
- This is argument is used only when argument "df" is pandas DataFrame.
1499
-
1500
- is_pti:
1501
- Boolean flag indicating if the table should be a PTI table.
1502
-
1503
- timecode_column:
1504
- timecode_column required to order the select expression for the insert.
1505
- It should be the first column in the select expression.
1506
-
1507
- sequence_column:
1508
- sequence_column required to order the select expression for the insert.
1509
- It should be the second column in the select expression.
1510
-
1511
- match_column_order:
1512
- Specifies the order of the df to be loaded matches the order of the
1513
- existing df or not.
1514
-
1515
- RETURNS:
1516
- N/A
1517
-
1518
- RAISES:
1519
- N/A
1520
-
1521
- EXAMPLES:
1522
- _insert_from_dataframe(df = my_df, con = tdconnection, schema = None, table_name = 'test_table',
1523
- index = True, index_label = None)
1524
- """
1525
- col_names = _get_pd_df_column_names(df)
1526
-
1527
- # Quoted, schema-qualified table name
1528
- table = '"{}"'.format(table_name)
1529
- if schema_name is not None:
1530
- table = '"{}".{}'.format(schema_name, table_name)
1531
-
1532
- try:
1533
-
1534
- if is_pti:
1535
- # This if for non-index columns.
1536
- col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
1537
-
1538
- is_multi_index = isinstance(df.index, pd.MultiIndex)
1539
-
1540
- insert_list = []
1541
-
1542
- if not match_column_order:
1543
- ins = "INSERT INTO {} {} VALUES {};".format(
1544
- table,
1545
- '(' + ', '.join(col_names) + ')',
1546
- '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1547
- if index is True else len(col_names))]) + ')')
1548
- else:
1549
- ins = "INSERT INTO {} VALUES {};".format(
1550
- table,
1551
- '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1552
- if index is True else len(col_names))]) + ')')
1553
-
1554
- rowcount = 0
1555
- # Iterate rows of DataFrame over new re-ordered columns
1556
- for row_index, row in enumerate(df[col_names].itertuples(index=True)):
1557
- ins_dict = ()
1558
- for col_index, x in enumerate(col_names):
1559
- ins_dict = ins_dict + (row[col_index+1],)
1560
-
1561
- if index is True:
1562
- ins_dict = ins_dict + row[0] if is_multi_index else ins_dict + (row[0],)
1563
-
1564
- insert_list.append(ins_dict)
1565
- rowcount = rowcount + 1
1566
-
1567
- # dbapi_batchsize corresponds to the max batch size for the DBAPI driver.
1568
- # Insert the rows once the batch-size reaches the max allowed.
1569
- if rowcount == chunksize:
1570
- # Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
1571
- cur = execute_sql(ins, insert_list)
1572
- if cur is not None:
1573
- cur.close()
1574
- rowcount = 0
1575
- insert_list.clear()
1576
-
1577
- # Insert any remaining rows.
1578
- if rowcount > 0:
1579
- cur = execute_sql(ins, insert_list)
1580
- if cur is not None:
1581
- cur.close()
1582
-
1583
- except Exception:
1584
- raise
1585
-
1586
- def _get_pd_df_column_names(df):
1587
- """
1588
- Internal function to return the names of columns in a Pandas DataFrame.
1589
-
1590
- PARAMETERS
1591
- df:
1592
- The Pandas DataFrame to fetch the column names for.
1593
-
1594
- RETURNS:
1595
- A list of Strings
1596
-
1597
- RAISES:
1598
- None
1599
-
1600
- EXAMPLES:
1601
- _get_pd_df_column_names(df = my_df)
1602
- """
1603
- return df.columns.tolist()
1604
-
1605
- def _get_sqlalchemy_mapping(key):
1606
- """
1607
- This is an internal function used to returns a SQLAlchemy Type Mapping
1608
- for a given Pandas DataFrame column Type.
1609
- Used for Table Object creation internally based on DF column info.
1610
-
1611
- For an unknown key, String (Mapping to VARCHAR) is returned
1612
-
1613
- PARAMETERS:
1614
- key : String representing Pandas type ('int64', 'object' etc.)
1615
-
1616
- RETURNS:
1617
- SQLAlchemy Type (Integer, String, Float, DateTime etc.)
1618
-
1619
- RAISES:
1620
- N/A
1621
-
1622
- EXAMPLES:
1623
- _get_sqlalchemy_mapping(key = 'int64')
1624
- """
1625
- teradata_types_map = _get_all_sqlalchemy_mappings()
1626
-
1627
- if key in teradata_types_map.keys():
1628
- return teradata_types_map.get(key)
1629
- else:
1630
- return VARCHAR(configure.default_varchar_size,charset='UNICODE')
1631
-
1632
-
1633
- def _get_all_sqlalchemy_mappings():
1634
- """
1635
- This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
1636
- It contains mappings from pandas data type to SQLAlchemyTypes
1637
-
1638
- PARAMETERS:
1639
-
1640
- RETURNS:
1641
- dictionary { pandas_type : SQLAlchemy Type}
1642
-
1643
- RAISES:
1644
- N/A
1645
-
1646
- EXAMPLES:
1647
- _get_all_sqlalchemy_mappings()
1648
- """
1649
- teradata_types_map = {'int32':INTEGER, 'int64':BIGINT,
1650
- 'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1651
- 'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1652
- 'float64':FLOAT, 'float32':FLOAT, 'bool':BYTEINT,
1653
- 'datetime64':TIMESTAMP, 'datetime64[ns]':TIMESTAMP,
1654
- 'datetime64[ns, UTC]':TIMESTAMP(timezone=True),
1655
- 'timedelta64[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1656
- 'timedelta[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE')}
1657
-
1658
- return teradata_types_map
1659
-
1660
-
1661
- def _validate_timezero_date(timezero_date):
1662
- """
1663
- Internal function to validate timezero_date specified when creating a
1664
- Primary Time Index (PTI) table.
1665
-
1666
- PARAMETERS:
1667
- timezero_date:
1668
- The timezero_date passed to primary_time_index().
1669
-
1670
- RETURNS:
1671
- True if the value is valid.
1672
-
1673
- RAISES:
1674
- ValueError when the value is invalid.
1675
-
1676
- EXAMPLE:
1677
- _validate_timezero_date("DATE '2011-01-01'")
1678
- _validate_timezero_date('2011-01-01') # Invalid
1679
- """
1680
- # Return True is it is not specified or is None since it is optional
1681
- if timezero_date is None:
1682
- return True
1683
-
1684
- pattern = re.compile(PTITableConstants.PATTERN_TIMEZERO_DATE.value)
1685
- match = pattern.match(timezero_date)
1686
-
1687
- err_msg = Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(timezero_date,
1688
- 'timezero_date',
1689
- "str of format DATE 'YYYY-MM-DD'")
1690
-
1691
- try:
1692
- datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
1693
- except (ValueError, AttributeError):
1694
- raise TeradataMlException(err_msg,
1695
- MessageCodes.INVALID_ARG_VALUE)
1696
-
1697
- # Looks like the value is valid
1698
- return True
1
+ #!/usr/bin/python
2
+ # ##################################################################
3
+ #
4
+ # Copyright 2018 Teradata. All rights reserved.
5
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
6
+ #
7
+ # ##################################################################
8
+
9
+ import re
10
+ import datetime
11
+ import warnings
12
+ import pandas as pd
13
+ import pandas.api.types as pt
14
+
15
+ from sqlalchemy import MetaData, Table, Column
16
+ from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
17
+ from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
18
+ from teradatasqlalchemy import (TIMESTAMP)
19
+ from teradatasqlalchemy import (VARCHAR)
20
+ from teradatasqlalchemy.dialect import TDCreateTablePost as post
21
+ from teradataml.common.aed_utils import AedUtils
22
+ from teradataml.context.context import *
23
+ from teradataml.dataframe import dataframe as tdmldf
24
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
25
+ from teradataml.dbutils.dbutils import _rename_table
26
+ from teradataml.common.utils import UtilFuncs
27
+ from teradataml.options.configure import configure
28
+ from teradataml.common.constants import CopyToConstants, PTITableConstants
29
+ from teradatasql import OperationalError
30
+ from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
+ from teradataml.utils.utils import execute_sql
32
+ from teradataml.utils.validators import _Validators
33
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
34
+
35
+
36
+ @collect_queryband(queryband="CpToSql")
37
+ def copy_to_sql(df, table_name,
38
+ schema_name=None, if_exists='append',
39
+ index=False, index_label=None,
40
+ primary_index=None,
41
+ temporary=False, types = None,
42
+ primary_time_index_name = None,
43
+ timecode_column=None,
44
+ timebucket_duration = None,
45
+ timezero_date = None,
46
+ columns_list=None,
47
+ sequence_column=None,
48
+ seq_max=None,
49
+ set_table=False,
50
+ chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
51
+ match_column_order=True):
52
+ """
53
+ Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
54
+
55
+ PARAMETERS:
56
+
57
+ df:
58
+ Required Argument.
59
+ Specifies the Pandas or teradataml DataFrame object to be saved.
60
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
61
+
62
+ table_name:
63
+ Required Argument.
64
+ Specifies the name of the table to be created in Vantage.
65
+ Types : String
66
+
67
+ schema_name:
68
+ Optional Argument.
69
+ Specifies the name of the SQL schema in Teradata Vantage to write to.
70
+ Types: String
71
+ Default: None (Uses default database schema).
72
+
73
+ Note: schema_name will be ignored when temporary=True.
74
+
75
+ if_exists:
76
+ Optional Argument.
77
+ Specifies the action to take when table already exists in Vantage.
78
+ Types: String
79
+ Possible values: {'fail', 'replace', 'append'}
80
+ - fail: If table exists, do nothing.
81
+ - replace: If table exists, drop it, recreate it, and insert data.
82
+ - append: If table exists, insert data. Create if does not exist.
83
+ Default : append
84
+
85
+ Note: Replacing a table with the contents of a teradataml DataFrame based on
86
+ the same underlying table is not supported.
87
+
88
+ index:
89
+ Optional Argument.
90
+ Specifies whether to save Pandas DataFrame index as a column or not.
91
+ Types : Boolean (True or False)
92
+ Default : False
93
+
94
+ Note: Only use as True when attempting to save Pandas DataFrames (and not with teradataml DataFrames).
95
+
96
+ index_label:
97
+ Optional Argument.
98
+ Specifies the column label(s) for Pandas DataFrame index column(s).
99
+ Types : String or list of strings
100
+ Default : None
101
+
102
+ Note: If index_label is not specified (defaulted to None or is empty) and `index` is True, then
103
+ the 'names' property of the DataFrames index is used as the label(s),
104
+ and if that too is None or empty, then:
105
+ 1) a default label 'index_label' or 'level_0' (when 'index_label' is already taken) is used
106
+ when index is standard.
107
+ 2) default labels 'level_0', 'level_1', etc. are used when index is multi-level index.
108
+
109
+ Only use as True when attempting to save Pandas DataFrames (and not on teradataml DataFrames).
110
+
111
+ primary_index:
112
+ Optional Argument.
113
+ Specifies which column(s) to use as primary index while creating Teradata table(s) in Vantage.
114
+ When None, No Primary Index Teradata tables are created.
115
+ Types : String or list of strings
116
+ Default : None
117
+ Example:
118
+ primary_index = 'my_primary_index'
119
+ primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
120
+
121
+ temporary:
122
+ Optional Argument.
123
+ Specifies whether to creates Vantage tables as permanent or volatile.
124
+ Types : Boolean (True or False)
125
+ Default : False
126
+
127
+ Note: When True:
128
+ 1. volatile Tables are created, and
129
+ 2. schema_name is ignored.
130
+ When False, permanent tables are created.
131
+
132
+ types
133
+ Optional Argument.
134
+ Specifies required data-types for requested columns to be saved in Vantage.
135
+ Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
136
+ Default: None
137
+
138
+ Note:
139
+ 1. This argument accepts a dictionary of columns names and their required teradatasqlalchemy types
140
+ as key-value pairs, allowing to specify a subset of the columns of a specific type.
141
+ i) When the input is a Pandas DataFrame:
142
+ - When only a subset of all columns are provided, the column types for the rest are assigned
143
+ appropriately.
144
+ - When types argument is not provided, the column types are assigned
145
+ as listed in the following table:
146
+ +---------------------------+-----------------------------------------+
147
+ | Pandas/Numpy Type | teradatasqlalchemy Type |
148
+ +---------------------------+-----------------------------------------+
149
+ | int32 | INTEGER |
150
+ +---------------------------+-----------------------------------------+
151
+ | int64 | BIGINT |
152
+ +---------------------------+-----------------------------------------+
153
+ | bool | BYTEINT |
154
+ +---------------------------+-----------------------------------------+
155
+ | float32/float64 | FLOAT |
156
+ +---------------------------+-----------------------------------------+
157
+ | datetime64/datetime64[ns] | TIMESTAMP |
158
+ +---------------------------+-----------------------------------------+
159
+ | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
160
+ +---------------------------+-----------------------------------------+
161
+ | Any other data type | VARCHAR(configure.default_varchar_size) |
162
+ +---------------------------+-----------------------------------------+
163
+ ii) When the input is a teradataml DataFrame:
164
+ - When only a subset of all columns are provided, the column types for the rest are retained.
165
+ - When types argument is not provided, the column types are retained.
166
+ 2. This argument does not have any effect when the table specified using table_name and schema_name
167
+ exists and if_exists = 'append'.
168
+
169
+ primary_time_index_name:
170
+ Optional Argument.
171
+ Specifies a name for the Primary Time Index (PTI) when the table
172
+ to be created must be a PTI table.
173
+ Type: String
174
+
175
+ Note: This argument is not required or used when the table to be created
176
+ is not a PTI table. It will be ignored if specified without the timecode_column.
177
+
178
+ timecode_column:
179
+ Optional argument.
180
+ Required when the DataFrame must be saved as a PTI table.
181
+ Specifies the column in the DataFrame that reflects the form
182
+ of the timestamp data in the time series.
183
+ This column will be the TD_TIMECODE column in the table created.
184
+ It should be of SQL type TIMESTAMP(n), TIMESTAMP(n) WITH TIMEZONE, or DATE,
185
+ corresponding to Python types datetime.datetime or datetime.date, or Pandas dtype datetime64[ns].
186
+ Type: String
187
+
188
+ Note: When you specify this parameter, an attempt to create a PTI table
189
+ will be made. This argument is not required when the table to be created
190
+ is not a PTI table. If this argument is specified, primary_index will be ignored.
191
+
192
+ timezero_date:
193
+ Optional Argument.
194
+ Used when the DataFrame must be saved as a PTI table.
195
+ Specifies the earliest time series data that the PTI table will accept;
196
+ a date that precedes the earliest date in the time series data.
197
+ Value specified must be of the following format: DATE 'YYYY-MM-DD'
198
+ Default Value: DATE '1970-01-01'.
199
+ Type: String
200
+
201
+ Note: This argument is not required or used when the table to be created
202
+ is not a PTI table. It will be ignored if specified without the timecode_column.
203
+
204
+ timebucket_duration:
205
+ Optional Argument.
206
+ Required if columns_list is not specified or is None.
207
+ Used when the DataFrame must be saved as a PTI table.
208
+ Specifies a duration that serves to break up the time continuum in
209
+ the time series data into discrete groups or buckets.
210
+ Specified using the formal form time_unit(n), where n is a positive
211
+ integer, and time_unit can be any of the following:
212
+ CAL_YEARS, CAL_MONTHS, CAL_DAYS, WEEKS, DAYS, HOURS, MINUTES,
213
+ SECONDS, MILLISECONDS, or MICROSECONDS.
214
+ Type: String
215
+
216
+ Note: This argument is not required or used when the table to be created
217
+ is not a PTI table. It will be ignored if specified without the timecode_column.
218
+
219
+ columns_list:
220
+ Optional Argument.
221
+ Used when the DataFrame must be saved as a PTI table.
222
+ Required if timebucket_duration is not specified.
223
+ A list of one or more PTI table column names.
224
+ Type: String or list of Strings
225
+
226
+ Note: This argument is not required or used when the table to be created
227
+ is not a PTI table. It will be ignored if specified without the timecode_column.
228
+
229
+ sequence_column:
230
+ Optional Argument.
231
+ Used when the DataFrame must be saved as a PTI table.
232
+ Specifies the column of type Integer containing the unique identifier for
233
+ time series data readings when they are not unique in time.
234
+ * When specified, implies SEQUENCED, meaning more than one reading from the same
235
+ sensor may have the same timestamp.
236
+ This column will be the TD_SEQNO column in the table created.
237
+ * When not specified, implies NONSEQUENCED, meaning there is only one sensor reading
238
+ per timestamp.
239
+ This is the default.
240
+ Type: str
241
+
242
+ Note: This argument is not required or used when the table to be created
243
+ is not a PTI table. It will be ignored if specified without the timecode_column.
244
+
245
+ seq_max:
246
+ Optional Argument.
247
+ Used when the DataFrame must be saved as a PTI table.
248
+ Specifies the maximum number of sensor data rows that can have the
249
+ same timestamp. Can be used when 'sequenced' is True.
250
+ Accepted range: 1 - 2147483647.
251
+ Default Value: 20000.
252
+ Type: int
253
+
254
+ Note: This argument is not required or used when the table to be created
255
+ is not a PTI table. It will be ignored if specified without the timecode_column.
256
+
257
+ set_table:
258
+ Optional Argument.
259
+ Specifies a flag to determine whether to create a SET or a MULTISET table.
260
+ When True, a SET table is created.
261
+ When False, a MULTISET table is created.
262
+ Default Value: False
263
+ Type: boolean
264
+
265
+ Note: 1. Specifying set_table=True also requires specifying primary_index or timecode_column.
266
+ 2. Creating SET table (set_table=True) may result in
267
+ a. an error if the source is a Pandas DataFrame having duplicate rows.
268
+ b. loss of duplicate rows if the source is a teradataml DataFrame.
269
+ 3. This argument has no effect if the table already exists and if_exists='append'.
270
+
271
+ chunksize:
272
+ Optional Argument.
273
+ Specifies the number of rows to be loaded in a batch.
274
+ Note:
275
+ This is argument is used only when argument "df" is pandas DataFrame.
276
+ Default Value: 16383
277
+ Types: int
278
+
279
+ match_column_order:
280
+ Optional Argument.
281
+ Specifies whether the order of the columns in existing table matches the order of
282
+ the columns in the "df" or not. When set to False, the dataframe to be loaded can
283
+ have any order and number of columns.
284
+ Default Value: True
285
+ Types: bool
286
+
287
+ RETURNS:
288
+ None
289
+
290
+ RAISES:
291
+ TeradataMlException
292
+
293
+ EXAMPLES:
294
+ 1. Saving a Pandas DataFrame:
295
+
296
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
297
+ >>> from teradatasqlalchemy.types import *
298
+
299
+ >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
300
+ 'emp_sage': [100, 200, 300, 400],
301
+ 'emp_id': [133, 144, 155, 177],
302
+ 'marks': [99.99, 97.32, 94.67, 91.00]
303
+ }
304
+
305
+ >>> pandas_df = pd.DataFrame(df)
306
+
307
+ a) Save a Pandas DataFrame using a dataframe & table name only:
308
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table')
309
+
310
+ b) Saving as a SET table
311
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_set_table', index=True,
312
+ primary_index='index_label', set_table=True)
313
+
314
+ c) Save a Pandas DataFrame by specifying additional parameters:
315
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table_2', schema_name = 'alice',
316
+ index = True, index_label = 'my_index_label', temporary = False,
317
+ primary_index = ['emp_id'], if_exists = 'append',
318
+ types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
319
+ 'emp_id': BIGINT, 'marks': DECIMAL})
320
+
321
+ d) Saving with additional parameters as a SET table
322
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
323
+ index = True, index_label = 'my_index_label', temporary = False,
324
+ primary_index = ['emp_id'], if_exists = 'append',
325
+ types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
326
+ 'emp_id': BIGINT, 'marks': DECIMAL},
327
+ set_table=True)
328
+
329
+ e) Saving levels in index of type MultiIndex
330
+ >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
331
+ >>> copy_to_sql(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
332
+ index = True, index_label = ['index1', 'index2'], temporary = False,
333
+ primary_index = ['index1'], if_exists = 'replace')
334
+
335
+ 2. Saving a teradataml DataFrame:
336
+
337
+ >>> from teradataml.dataframe.dataframe import DataFrame
338
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
339
+ >>> from teradatasqlalchemy.types import *
340
+ >>> from teradataml.data.load_example_data import load_example_data
341
+
342
+ >>> # Load the data to run the example.
343
+ >>> load_example_data("glm", "admissions_train")
344
+
345
+ >>> # Create teradataml DataFrame(s)
346
+ >>> df = DataFrame('admissions_train')
347
+ >>> df2 = df.select(['gpa', 'masters'])
348
+
349
+ a) Save a teradataml DataFrame by using only a table name:
350
+ >>> df2.to_sql('my_tdml_table')
351
+
352
+ b) Save a teradataml DataFrame by using additional parameters:
353
+ >>> df2.to_sql(table_name = 'my_tdml_table', if_exists='append',
354
+ primary_index = ['gpa'], temporary=False, schema_name='alice')
355
+
356
+ c) Alternatively, save a teradataml DataFrame by using copy_to_sql:
357
+ >>> copy_to_sql(df2, 'my_tdml_table_2')
358
+
359
+ d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
360
+ >>> copy_to_sql(df = df2, table_name = 'my_tdml_table_3', schema_name = 'alice',
361
+ temporary = False, primary_index = None, if_exists = 'append',
362
+ types = {'masters': VARCHAR, 'gpa':INTEGER})
363
+
364
+ e) Saving as a SET table
365
+ >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name = 'alice',
366
+ temporary = False, primary_index = ['gpa'], if_exists = 'append',
367
+ types = {'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
368
+
369
+ 3. Saving a teradataml DataFrame as a PTI table:
370
+
371
+ >>> from teradataml.dataframe.dataframe import DataFrame
372
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
373
+ >>> from teradataml.data.load_example_data import load_example_data
374
+
375
+ >>> load_example_data("sessionize", "sessionize_table")
376
+ >>> df3 = DataFrame('sessionize_table')
377
+
378
+ a) Using copy_to_sql
379
+ >>> copy_to_sql(df3, "test_copyto_pti",
380
+ timecode_column='clicktime',
381
+ columns_list='event')
382
+
383
+ b) Alternatively, using DataFrame.to_sql
384
+ >>> df3.to_sql(table_name = "test_copyto_pti_1",
385
+ timecode_column='clicktime',
386
+ columns_list='event')
387
+
388
+ c) Saving as a SET table
389
+ >>> copy_to_sql(df3, "test_copyto_pti_2",
390
+ timecode_column='clicktime',
391
+ columns_list='event',
392
+ set_table=True)
393
+
394
+ """
395
+ # Deriving global connection using get_connection().
396
+ con = get_connection()
397
+
398
+ try:
399
+ if con is None:
400
+ raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE), MessageCodes.CONNECTION_FAILURE)
401
+
402
+ # Check if the table to be created must be a Primary Time Index (PTI) table.
403
+ # If a user specifies the timecode_column parameter, and attempt to create
404
+ # a PTI will be made.
405
+ is_pti = False
406
+ if timecode_column is not None:
407
+ is_pti = True
408
+ if primary_index is not None:
409
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
410
+ 'primary_index',
411
+ 'timecode_column',
412
+ 'specified'), stacklevel=2)
413
+ else:
414
+ ignored = []
415
+ if timezero_date is not None: ignored.append('timezero_date')
416
+ if timebucket_duration is not None: ignored.append('timebucket_duration')
417
+ if sequence_column is not None: ignored.append('sequence_column')
418
+ if seq_max is not None: ignored.append('seq_max')
419
+ if columns_list is not None and (
420
+ not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
421
+ if primary_time_index_name is not None: ignored.append('primary_time_index_name')
422
+ if len(ignored) > 0:
423
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
424
+ ignored,
425
+ 'timecode_column',
426
+ 'missing'), stacklevel=2)
427
+
428
+ # Unset schema_name when temporary is True since volatile tables are always in the user database
429
+ if temporary is True:
430
+ if schema_name is not None:
431
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
432
+ 'schema_name',
433
+ 'temporary=True',
434
+ 'specified'), stacklevel=2)
435
+ schema_name = None
436
+
437
+ # Validate DataFrame & related flags; Proceed only when True
438
+ from teradataml.dataframe.data_transfer import _DataTransferUtils
439
+ dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name,
440
+ if_exists=if_exists, index=index, index_label=index_label,
441
+ primary_index=primary_index, temporary=temporary,
442
+ types=types, primary_time_index_name=primary_time_index_name,
443
+ timecode_column=timecode_column,
444
+ timebucket_duration=timebucket_duration,
445
+ timezero_date=timezero_date, columns_list=columns_list,
446
+ sequence_column=sequence_column, seq_max=seq_max,
447
+ set_table=set_table, api_name='copy_to',
448
+ chunksize=chunksize, match_column_order=match_column_order)
449
+
450
+ dt_obj._validate()
451
+
452
+ # If the table created must be a PTI table, then validate additional parameters
453
+ # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
454
+ # will be ignored - for example, primary_index
455
+ if is_pti:
456
+ _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
457
+ timezero_date, primary_time_index_name, columns_list,
458
+ sequence_column, seq_max, types, index, index_label)
459
+
460
+ # A table cannot be a SET table and have NO PRIMARY INDEX
461
+ if set_table and primary_index is None and timecode_column is None:
462
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
463
+ MessageCodes.SET_TABLE_NO_PI)
464
+
465
+ # Check if destination table exists
466
+ table_exists = dt_obj._table_exists(con)
467
+
468
+ # Raise an exception when the table exists and if_exists = 'fail'
469
+ dt_obj._check_table_exists(is_table_exists=table_exists)
470
+
471
+ # Is the input DataFrame a Pandas DataFrame?
472
+ is_pandas_df = isinstance(df, pd.DataFrame)
473
+
474
+ # Let's also execute the node and set the table_name when df is teradataml DataFrame
475
+ if not is_pandas_df and df._table_name is None:
476
+ df._table_name = df_utils._execute_node_return_db_object_name(df._nodeid, df._metaexpr)
477
+
478
+ # Check table name conflict is present.
479
+ is_conflict = _check_table_name_conflict(df, table_name) if isinstance(df, tdmldf.DataFrame) and \
480
+ if_exists.lower() == 'replace' else False
481
+
482
+ # Create a temporary table name, When table name conflict is present.
483
+ if is_conflict:
484
+ # Store actual destination table name for later use.
485
+ dest_table_name = table_name
486
+ table_name = UtilFuncs._generate_temp_table_name(prefix=table_name,
487
+ table_type=TeradataConstants.TERADATA_TABLE,
488
+ quote=False)
489
+
490
+ # Let's create the SQLAlchemy table object to recreate the table
491
+ if not table_exists or if_exists.lower() == 'replace':
492
+ if not is_pti:
493
+ table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
494
+ types, None if not is_pandas_df else index,
495
+ None if not is_pandas_df else index_label)
496
+ else:
497
+ table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
498
+ primary_time_index_name, timecode_column, timezero_date,
499
+ timebucket_duration, sequence_column, seq_max,
500
+ columns_list, set_table, types,
501
+ None if not is_pandas_df else index,
502
+ None if not is_pandas_df else index_label)
503
+
504
+ if table is not None:
505
+ # If the table need to be replaced and there is no table name conflict,
506
+ # let's drop the existing table first
507
+ if table_exists and not is_conflict:
508
+ tbl_name = dt_obj._get_fully_qualified_table_name()
509
+ UtilFuncs._drop_table(tbl_name)
510
+ try:
511
+ table.create(bind=get_context())
512
+ except sqlachemyOperationalError as err:
513
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
514
+ '\n' + str(err),
515
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
516
+ else:
517
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
518
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
519
+
520
+ # Check column compatibility for insertion when table exists and if_exists = 'append'
521
+ if table_exists and if_exists.lower() == 'append':
522
+ UtilFuncs._get_warnings('set_table', set_table, 'if_exists', 'append')
523
+
524
+ table = UtilFuncs._get_sqlalchemy_table(table_name,
525
+ schema_name=schema_name)
526
+
527
+ if table is not None:
528
+ # ELE-2284
529
+ # We are not considering types for 'append' mode as it is a simple insert and no casting is applied
530
+ if is_pandas_df:
531
+ cols = _extract_column_info(df, index=index, index_label=index_label)
532
+ else:
533
+ cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
534
+ if match_column_order:
535
+ cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
536
+ is_pti, timecode_column, sequence_column)
537
+
538
+ if not cols_compatible:
539
+ raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
540
+ MessageCodes.INSERTION_INCOMPATIBLE)
541
+
542
+ # df is a Pandas DataFrame object
543
+ if isinstance(df, pd.DataFrame):
544
+ if not table_exists or if_exists.lower() == 'replace':
545
+ try:
546
+ # Support for saving Pandas index/Volatile is by manually inserting rows (batch) for now
547
+ if index or is_pti:
548
+ _insert_from_dataframe(df, con, schema_name, table_name, index,
549
+ chunksize, is_pti, timecode_column,
550
+ sequence_column, match_column_order)
551
+
552
+ # When index isn't saved & for non-PTI tables, to_sql insertion used (batch)
553
+ else:
554
+ # Empty queryband buffer before SQL call.
555
+ UtilFuncs._set_queryband()
556
+ df.to_sql(table_name, get_context(), if_exists='append', index=False, index_label=None,
557
+ chunksize=chunksize, schema=schema_name)
558
+
559
+ except sqlachemyOperationalError as err:
560
+ if "Duplicate row error" in str(err):
561
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_DUPICATE_ROW,
562
+ table_name),
563
+ MessageCodes.SET_TABLE_DUPICATE_ROW)
564
+ else:
565
+ raise
566
+
567
+ elif table_exists and if_exists.lower() == 'append':
568
+ _insert_from_dataframe(df, con, schema_name, table_name, index,
569
+ chunksize, is_pti, timecode_column,
570
+ sequence_column, match_column_order)
571
+
572
+
573
+ # df is a teradataml DataFrame object (to_sql wrapper used)
574
+ elif isinstance(df, tdmldf.DataFrame):
575
+ df_column_list = [col.name for col in df._metaexpr.c]
576
+
577
+ if is_pti:
578
+ # Reorder the column list to reposition the timecode and sequence columns
579
+ df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
580
+
581
+ df_utils._insert_all_from_table(table_name, df._table_name, df_column_list, schema_name, temporary)
582
+
583
+ # While table name conflict is present, Delete the source table after creation of temporary table.
584
+ # Rename the temporary table to destination table name.
585
+ if is_conflict and if_exists.lower() == 'replace':
586
+ tbl_name = dt_obj._get_fully_qualified_table_name()
587
+ UtilFuncs._drop_table(tbl_name)
588
+ _rename_table(table_name, dest_table_name)
589
+
590
+
591
+ except (TeradataMlException, ValueError, TypeError):
592
+ raise
593
+ except Exception as err:
594
+ raise TeradataMlException(Messages.get_message(MessageCodes.COPY_TO_SQL_FAIL) + str(err),
595
+ MessageCodes.COPY_TO_SQL_FAIL) from err
596
+
597
+
598
+ def _check_table_name_conflict(df, table_name):
599
+ """
600
+ Check whether destination "table_name" matches with the teradataml dataframe parent nodes.
601
+ This function traverse the DAG graph from child node to root node and checks for table name conflict.
602
+
603
+ PARAMETERS:
604
+ df:
605
+ Required Argument.
606
+ Specifies the teradataml DataFrame object to be checked.
607
+ Types: teradataml.dataframe.dataframe.DataFrame
608
+
609
+ table_name:
610
+ Required Argument.
611
+ Specifies the name of the table to be created in Vantage.
612
+ Types : String
613
+
614
+ RETURNS:
615
+ A boolean value representing the presence of conflict.
616
+
617
+ RAISES:
618
+ None
619
+
620
+ EXAMPLES:
621
+ >>> df = DataFrame("sales")
622
+ >>> table_name = "destination_table"
623
+ >>> _check_table_name_conflict(df, table_name)
624
+ """
625
+ aed_obj = AedUtils()
626
+ # Check if length of parent node count greater that 0.
627
+ if aed_obj._aed_get_parent_node_count(df._nodeid) > 0:
628
+ # Let's check "table_name" matches with any of the parent nodes table name.
629
+ # Get current table node id.
630
+ node_id = df._nodeid
631
+ while node_id:
632
+
633
+ # Get the parent node id using current table node id.
634
+ parent_node_id = aed_obj._aed_get_parent_nodeids(node_id)
635
+
636
+ if parent_node_id:
637
+ # Check "table_name" matches with the parent "table_name".
638
+ # If table name matches, then return 'True'.
639
+ # Otherwise, Traverse the graph from current node to the top most root node.
640
+ if table_name in aed_obj._aed_get_source_tablename(parent_node_id[0]):
641
+ return True
642
+ else:
643
+ node_id = parent_node_id[0]
644
+ else:
645
+ # When parent_node_id is empty return 'False'.
646
+ return False
647
+ return False
648
+
649
+
650
+ def _get_sqlalchemy_table_from_tdmldf(df, meta):
651
+ """
652
+ This is an internal function used to generate an SQLAlchemy Table
653
+ object for the underlying table/view of a DataFrame.
654
+
655
+ PARAMETERS:
656
+ df:
657
+ The teradataml DataFrame to generate the SQLAlchemy.Table object for.
658
+
659
+ meta:
660
+ The SQLAlchemy.Metadata object.
661
+
662
+ RETURNS:
663
+ SQLAlchemy.Table
664
+
665
+ RAISES:
666
+ None
667
+
668
+ EXAMPLES:
669
+ >>> con = get_connection()
670
+ >>> df = DataFrame('admissions_train')
671
+ >>> meta = sqlalchemy.MetaData()
672
+ >>> table = __get_sqlalchemy_table_from_tdmldf(df, meta)
673
+
674
+ """
675
+ con = get_connection()
676
+ db_schema = UtilFuncs._extract_db_name(df._table_name)
677
+ db_table_name = UtilFuncs._extract_table_name(df._table_name)
678
+
679
+ return Table(db_table_name, meta, schema=db_schema, autoload_with=get_context())
680
+
681
+
682
+ def _get_index_labels(df, index_label):
683
+ """
684
+ Internal function to construct a list of labels for the indices to be saved from the Pandas DataFrames
685
+ based on user input and information from the DataFrame.
686
+
687
+ PARAMETERS:
688
+ df:
689
+ The Pandas input DataFrame.
690
+
691
+ index_label:
692
+ The user provided label(s) for the indices.
693
+
694
+ RAISES:
695
+ None
696
+
697
+ RETURNS:
698
+ A list of Strings corresponding the to labels for the indices to add as columns.
699
+
700
+ EXAMPLES:
701
+ _get_index_labels(df, index_label)
702
+ """
703
+ default_index_label = 'index_label'
704
+ default_level_prefix = 'level_'
705
+ level_cnt = 0
706
+
707
+ is_multi_index = isinstance(df.index, pd.MultiIndex)
708
+ ind_types = [level.dtype for level in df.index.levels] if is_multi_index else [df.index.dtype]
709
+
710
+ ind_names = []
711
+ if index_label:
712
+ ind_names = [index_label] if isinstance(index_label, str) else index_label
713
+ else:
714
+ for name in df.index.names:
715
+ if name not in ('', None):
716
+ ind_names.append(name)
717
+ else:
718
+ if is_multi_index:
719
+ ind_names.append(default_level_prefix + str(level_cnt))
720
+ level_cnt = level_cnt + 1
721
+ else:
722
+ df_columns = _get_pd_df_column_names(df)
723
+ label = default_level_prefix + str(level_cnt) if default_index_label in df_columns else default_index_label
724
+ ind_names.append(label)
725
+
726
+ return ind_names, ind_types
727
+
728
+
729
+ def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
730
+ timezero_date, primary_time_index_name, columns_list,
731
+ sequence_column, seq_max, types, index, index_label):
732
+ """
733
+ This is an internal function used to validate the PTI part of copy request.
734
+ Dataframe, connection & related parameters are checked.
735
+ Saving to Vantage is proceeded to only when validation returns True.
736
+
737
+ PARAMETERS:
738
+ df:
739
+ The DataFrame (Pandas or teradataml) object to be saved.
740
+
741
+ timecode_column:
742
+ The column in the DataFrame that reflects the form of the timestamp
743
+ data in the time series.
744
+ Type: String
745
+
746
+ timebucket_duration:
747
+ A duration that serves to break up the time continuum in
748
+ the time series data into discrete groups or buckets.
749
+ Type: String
750
+
751
+ timezero_date:
752
+ Specifies the earliest time series data that the PTI table will accept.
753
+ Type: String
754
+
755
+ primary_time_index_name:
756
+ A name for the Primary Time Index (PTI).
757
+ Type: String
758
+
759
+ columns_list:
760
+ A list of one or more PTI table column names.
761
+ Type: String or list of Strings
762
+
763
+ sequence_column:
764
+ Specifies a column of type Integer with sequences implying that the
765
+ time series data readings are not unique.
766
+ If not specified, the time series data are assumed to be unique in time.
767
+ Type: String
768
+
769
+ seq_max:
770
+ Specifies the maximum number of sensor data rows that can have the
771
+ same timestamp. Can be used when 'sequenced' is True.
772
+ Accepted range: 1 - 2147483647.
773
+ Type: int
774
+
775
+ types:
776
+ Dictionary specifying column-name to teradatasqlalchemy type-mapping.
777
+
778
+ index:
779
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
780
+ Type: bool
781
+
782
+ index_label:
783
+ Column label for index column(s).
784
+ Type: String
785
+
786
+ RETURNS:
787
+ True, when all parameters are valid.
788
+
789
+ RAISES:
790
+ TeradataMlException, when parameter validation fails.
791
+
792
+ EXAMPLES:
793
+ _validate_pti_copy_parameters(df = my_df, timecode_column = 'ts', timbucket_duration = 'HOURS(2)')
794
+ """
795
+ if isinstance(df, pd.DataFrame):
796
+ df_columns = _get_pd_df_column_names(df)
797
+ else:
798
+ df_columns = [col.name for col in df._metaexpr.c]
799
+
800
+ awu = AnalyticsWrapperUtils()
801
+ awu_matrix = []
802
+
803
+ # The arguments added to awu_martix are:
804
+ # arg_name, arg, is_optional, acceptable types
805
+ # The value for is_optional is set to False when the argument
806
+ # a) is a required argument
807
+ # b) is not allowed to be None, even if it is optional
808
+ awu_matrix.append(['timecode_column', timecode_column, False, (str)])
809
+ awu_matrix.append(['columns_list', columns_list, True, (str, list)])
810
+ awu_matrix.append(['timezero_date', timezero_date, True, (str)])
811
+ awu_matrix.append(['timebucket_duration', timebucket_duration, True, (str)])
812
+ awu_matrix.append(['primary_time_index_name', primary_time_index_name, True, (str)])
813
+ awu_matrix.append(['sequence_column', sequence_column, True, (str)])
814
+ awu_matrix.append(['seq_max', seq_max, True, (int)])
815
+
816
+ # Validate types
817
+ awu._validate_argument_types(awu_matrix)
818
+
819
+ # Validate arg emtpy
820
+ awu._validate_input_columns_not_empty(timecode_column, 'timecode_column')
821
+ awu._validate_input_columns_not_empty(columns_list, 'columns_list')
822
+ awu._validate_input_columns_not_empty(timezero_date, 'timezero_date')
823
+ awu._validate_input_columns_not_empty(timebucket_duration, 'timebucket_duration')
824
+ awu._validate_input_columns_not_empty(sequence_column, 'sequence_column')
825
+
826
+ # Validate all the required arguments and optional arguments when not none
827
+ # First the timecode_column
828
+ _validate_column_in_list_of_columns('df', df_columns, timecode_column, 'timecode_column')
829
+ # Check the type of timecode_column
830
+ _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES.value,
831
+ types, index, index_label)
832
+
833
+ # timezero date
834
+ _validate_timezero_date(timezero_date)
835
+
836
+ # timebucket duration
837
+ _Validators._validate_timebucket_duration(timebucket_duration)
838
+
839
+ # Validate sequence_column
840
+ if sequence_column is not None:
841
+ _validate_column_in_list_of_columns('df', df_columns, sequence_column, 'sequence_column')
842
+ # Check the type of sequence_column
843
+ _validate_column_type(df, sequence_column, 'sequence_column',
844
+ PTITableConstants.VALID_SEQUENCE_COL_DATATYPES.value, types, index, index_label)
845
+
846
+ # Validate seq_max
847
+ if seq_max is not None and (seq_max < 1 or seq_max > 2147483647):
848
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(seq_max, 'seq_max', '1 < integer < 2147483647'),
849
+ MessageCodes.INVALID_ARG_VALUE)
850
+
851
+ # Validate cols_list
852
+ _validate_columns_list('df', df_columns, columns_list)
853
+ if isinstance(columns_list, str):
854
+ columns_list = [columns_list]
855
+
856
+ # Either one or both of timebucket_duration and columns_list must be specified
857
+ if timebucket_duration is None and (columns_list is None or len(columns_list) == 0):
858
+ raise TeradataMlException(
859
+ Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, 'timebucket_duration', 'columns_list'),
860
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
861
+
862
+
863
+ def _validate_columns_list(df, df_columns, columns_list):
864
+ """
865
+ Internal function to validate columns list specified when creating a
866
+ Primary Time Index (PTI) table.
867
+
868
+ PARAMETERS:
869
+ df:
870
+ Name of the DataFrame to which the column being validated
871
+ does or should belong.
872
+
873
+ df_columns:
874
+ List of columns in the DataFrame.
875
+
876
+ columns_list:
877
+ The column or list of columns.
878
+ Type: String or list of Strings
879
+
880
+ RETURNS:
881
+ True if the column or list of columns is valid.
882
+
883
+ RAISES:
884
+ Raise TeradataMlException on validation failure.
885
+ """
886
+ if columns_list is None:
887
+ return True
888
+
889
+ # Validate DF has columns
890
+ if isinstance(columns_list, str):
891
+ columns_list = [columns_list]
892
+
893
+ for col in columns_list:
894
+ _validate_column_in_list_of_columns(df, df_columns, col, 'columns_list')
895
+
896
+ return True
897
+
898
+
899
+ def _validate_column_in_list_of_columns(df, df_columns, col, col_arg):
900
+ """
901
+ Internal function to validate the arguments used to specify
902
+ a column name in DataFrame.
903
+
904
+ PARAMETERS:
905
+ df:
906
+ Name of the DataFrame to which the column being validated
907
+ does or should belong.
908
+
909
+ df_column_list:
910
+ List of columns in the DataFrame.
911
+
912
+ col:
913
+ Column to be validated.
914
+
915
+ col_arg:
916
+ Name of argument used to specify the column name.
917
+
918
+ RETURNS:
919
+ True, if column name is a valid.
920
+
921
+ RAISES:
922
+ TeradataMlException if invalid column name.
923
+ """
924
+ if col not in df_columns:
925
+ raise TeradataMlException(
926
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(col,
927
+ col_arg,
928
+ df,
929
+ 'DataFrame'),
930
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND)
931
+
932
+ return True
933
+
934
+
935
+ def _validate_column_type(df, col, col_arg, expected_types, types = None, index = False, index_label = None):
936
+ """
937
+ Internal function to validate the type of an input DataFrame column against
938
+ a list of expected types.
939
+
940
+ PARAMETERS
941
+ df:
942
+ Input DataFrame (Pandas or teradataml) which has the column to be tested
943
+ for type.
944
+
945
+ col:
946
+ The column in the input DataFrame to be tested for type.
947
+
948
+ col_arg:
949
+ The name of the argument used to pass the column name.
950
+
951
+ expected_types:
952
+ Specifies a list of teradatasqlachemy datatypes that the column is
953
+ expected to be of type.
954
+
955
+ types:
956
+ Dictionary specifying column-name to teradatasqlalchemy type-mapping.
957
+
958
+ RETURNS:
959
+ True, when the columns is of an expected type.
960
+
961
+ RAISES:
962
+ TeradataMlException, when the columns is not one of the expected types.
963
+
964
+ EXAMPLES:
965
+ _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES, types)
966
+ """
967
+ # Check if sequence_column is being translated to a valid_type
968
+ if types is not None and col in types:
969
+ if not any(isinstance(types[col], expected_type) for expected_type in expected_types):
970
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
971
+ format(col_arg, types[col], ' or '.join(expected_type.__visit_name__
972
+ for expected_type in expected_types)),
973
+ MessageCodes.INVALID_COLUMN_TYPE)
974
+ # Else we need to copy without any casting
975
+ elif isinstance(df, pd.DataFrame):
976
+ t = _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
977
+ if t not in expected_types:
978
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
979
+ format(col_arg, t, ' or '.join(expected_type.__visit_name__
980
+ for expected_type in expected_types)),
981
+ MessageCodes.INVALID_COLUMN_TYPE)
982
+ elif not any(isinstance(df[col].type, t) for t in expected_types):
983
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
984
+ format(col_arg, df[col].type, ' or '.join(expected_type.__visit_name__
985
+ for expected_type in expected_types)),
986
+ MessageCodes.INVALID_COLUMN_TYPE)
987
+
988
+ return True
989
+
990
+
991
+ def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
992
+ index_label=None):
993
+ """
994
+ This is an internal function used to construct a SQLAlchemy Table Object.
995
+ This function checks appropriate flags and supports creation of Teradata
996
+ specific Table constructs such as Volatile/Primary Index tables.
997
+
998
+
999
+ PARAMETERS:
1000
+ df:
1001
+ The teradataml or Pandas DataFrame object to be saved.
1002
+
1003
+ table_name:
1004
+ Name of SQL table.
1005
+
1006
+ con:
1007
+ A SQLAlchemy connectable (engine/connection) object
1008
+
1009
+ primary_index:
1010
+ Creates Teradata Table(s) with Primary index column if specified.
1011
+
1012
+ temporary:
1013
+ Flag specifying whether SQL table to be created is Volatile or not.
1014
+
1015
+ schema_name:
1016
+ Specifies the name of the SQL schema in the database to write to.
1017
+
1018
+ set_table:
1019
+ A flag specifying whether to create a SET table or a MULTISET table.
1020
+ When True, an attempt to create a SET table is made.
1021
+ When False, an attempt to create a MULTISET table is made.
1022
+
1023
+ types:
1024
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1025
+
1026
+ index:
1027
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1028
+
1029
+ index_label:
1030
+ Column label(s) for index column(s).
1031
+
1032
+ RETURNS:
1033
+ SQLAlchemy Table
1034
+
1035
+ RAISES:
1036
+ N/A
1037
+
1038
+ EXAMPLES:
1039
+ _create_table_object(df = my_df, table_name = 'test_table', con = tdconnection, primary_index = None,
1040
+ temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1041
+ _create_table_object(df = csv_filepath, table_name = 'test_table', con = tdconnection, primary_index = None,
1042
+ temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1043
+ """
1044
+ # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1045
+ post_params = {}
1046
+ prefix = []
1047
+ pti = post(opts=post_params)
1048
+
1049
+ if temporary is True:
1050
+ pti = pti.on_commit(option='preserve')
1051
+ prefix.append('VOLATILE')
1052
+
1053
+ if not set_table:
1054
+ prefix.append('multiset')
1055
+ else:
1056
+ prefix.append('set')
1057
+
1058
+ meta = MetaData()
1059
+ meta.bind = con
1060
+
1061
+ if isinstance(df, pd.DataFrame):
1062
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1063
+ elif isinstance(df, str):
1064
+ col_names, col_types = _extract_column_info(df, types)
1065
+ else:
1066
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1067
+ if types is not None:
1068
+ # When user-type provided use, or default when partial types provided.
1069
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1070
+
1071
+ if primary_index is not None:
1072
+ if isinstance(primary_index, list):
1073
+ pti = pti.primary_index(unique=False, cols=primary_index)
1074
+ elif isinstance(primary_index, str):
1075
+ pti = pti.primary_index(unique=False, cols=[primary_index])
1076
+ else:
1077
+ pti = pti.no_primary_index()
1078
+
1079
+ # Create default Table construct with parameter dictionary
1080
+ table = Table(table_name, meta,
1081
+ *(Column(col_name, col_type)
1082
+ for col_name, col_type in
1083
+ zip(col_names, col_types)),
1084
+ teradatasql_post_create=pti,
1085
+ prefixes=prefix,
1086
+ schema=schema_name
1087
+ )
1088
+
1089
+ return table
1090
+
1091
+
1092
+ def _create_pti_table_object(df, con, table_name, schema_name, temporary, primary_time_index_name,
1093
+ timecode_column, timezero_date, timebucket_duration,
1094
+ sequence_column, seq_max, columns_list, set_table, types, index=None, index_label=None):
1095
+ """
1096
+ This is an internal function used to construct a SQLAlchemy Table Object.
1097
+ This function checks appropriate flags and supports creation of Teradata
1098
+ specific Table constructs such as Volatile and Primary Time Index tables.
1099
+
1100
+ PARAMETERS:
1101
+ df:
1102
+ The teradataml or Pandas DataFrame object to be saved.
1103
+
1104
+ con:
1105
+ A SQLAlchemy connectable (engine/connection) object
1106
+
1107
+ table_name:
1108
+ Name of SQL table.
1109
+
1110
+ schema_name:
1111
+ Specifies the name of the SQL schema in the database to write to.
1112
+
1113
+ temporary:
1114
+ Flag specifying whether SQL table to be created is Volatile or not.
1115
+
1116
+ primary_time_index_name:
1117
+ A name for the Primary Time Index (PTI).
1118
+
1119
+ timecode_column:
1120
+ The column in the DataFrame that reflects the form of the timestamp
1121
+ data in the time series.
1122
+
1123
+ timezero_date:
1124
+ Specifies the earliest time series data that the PTI table will accept.
1125
+
1126
+ timebucket_duration:
1127
+ A duration that serves to break up the time continuum in
1128
+ the time series data into discrete groups or buckets.
1129
+
1130
+ sequence_column:
1131
+ Specifies a column with sequences implying that time series data
1132
+ readings are not unique. If not specified, the time series data are
1133
+ assumed to be unique.
1134
+
1135
+ seq_max:
1136
+ Specifies the maximum number of sensor data rows that can have the
1137
+ same timestamp. Can be used when 'sequenced' is True.
1138
+
1139
+ columns_list:
1140
+ A list of one or more PTI table column names.
1141
+
1142
+ set_table:
1143
+ A flag specifying whether to create a SET table or a MULTISET table.
1144
+ When True, an attempt to create a SET table is made.
1145
+ When False, an attempt to create a MULTISET table is made.
1146
+
1147
+ types:
1148
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1149
+
1150
+ index:
1151
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
1152
+
1153
+ index_label:
1154
+ Column label for index column(s).
1155
+
1156
+ RETURNS:
1157
+ SQLAlchemy Table
1158
+
1159
+ RAISES:
1160
+ N/A
1161
+
1162
+ EXAMPLES:
1163
+ _create_pti_table_object(df = my_df, table_name = 'test_table', con = tdconnection,
1164
+ timecode_column = 'ts', columns_list = ['user_id', 'location'])
1165
+
1166
+ """
1167
+ meta = MetaData()
1168
+
1169
+ if isinstance(df, pd.DataFrame):
1170
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1171
+ timecode_datatype = col_types[col_names.index(timecode_column)]()
1172
+ else:
1173
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1174
+ if types is not None:
1175
+ # When user-type provided use, or default when partial types provided
1176
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1177
+ timecode_datatype = df[timecode_column].type
1178
+
1179
+ # Remove timecode and sequence column from col_name and col_types
1180
+ # since the required columns will be created automatically
1181
+ if timecode_column in col_names:
1182
+ ind = col_names.index(timecode_column)
1183
+ col_names.pop(ind)
1184
+ col_types.pop(ind)
1185
+
1186
+ if sequence_column is not None and sequence_column in col_names:
1187
+ ind = col_names.index(sequence_column)
1188
+ col_names.pop(ind)
1189
+ col_types.pop(ind)
1190
+
1191
+ # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1192
+ post_params = {}
1193
+ prefix = []
1194
+ pti = post(opts=post_params)
1195
+
1196
+ # Create Table object with appropriate Primary Time Index/Prefix for volatile
1197
+ if temporary:
1198
+ pti = pti.on_commit(option='preserve')
1199
+ prefix.append('VOLATILE')
1200
+
1201
+ if not set_table:
1202
+ prefix.append('multiset')
1203
+ else:
1204
+ prefix.append('set')
1205
+
1206
+ pti = pti.primary_time_index(timecode_datatype,
1207
+ name=primary_time_index_name,
1208
+ timezero_date=timezero_date,
1209
+ timebucket_duration=timebucket_duration,
1210
+ sequenced=True if sequence_column is not None else False,
1211
+ seq_max=seq_max,
1212
+ cols=columns_list)
1213
+
1214
+ table = Table(table_name, meta,
1215
+ *(Column(col_name, col_type)
1216
+ for col_name, col_type in
1217
+ zip(col_names, col_types)),
1218
+ teradatasql_post_create=pti,
1219
+ prefixes=prefix,
1220
+ schema=schema_name
1221
+ )
1222
+
1223
+ return table
1224
+
1225
+
1226
+ def _rename_column(col_names, search_for, rename_to):
1227
+ """
1228
+ Internal function to rename a column in a list of columns of a Pandas DataFrame.
1229
+
1230
+ PARAMETERS:
1231
+ col_names:
1232
+ Required Argument.
1233
+ The list of column names of the Pandas DataFrame.
1234
+
1235
+ search_for:
1236
+ Required Argument.
1237
+ The column name that need to be changed/renamed.
1238
+
1239
+ rename_to:
1240
+ Required Argument.
1241
+ The column name that the 'search_for' column needs to be replaced with.
1242
+
1243
+ RETURNS:
1244
+ A list of renamed columns list.
1245
+
1246
+ EXAMPLES:
1247
+ cols = _rename_column(cols, 'col_1', 'new_col_1')
1248
+ """
1249
+ ind = col_names.index(search_for)
1250
+ col_names.pop(ind)
1251
+ col_names.insert(ind, rename_to)
1252
+
1253
+ return col_names
1254
+
1255
+
1256
+ def _rename_to_pti_columns(col_names, timecode_column, sequence_column,
1257
+ timecode_column_index=None, sequence_column_index=None):
1258
+ """
1259
+ Internal function to generate a list of renamed columns of a Pandas DataFrame to match that of the PTI table column names
1260
+ in Vantage, or revert any such changes made.
1261
+
1262
+ PARAMETERS:
1263
+ col_names:
1264
+ The list of column names of the Pandas DataFrame.
1265
+
1266
+ timecode_column:
1267
+ The column name that reflects the timecode column in the PTI table.
1268
+
1269
+ sequence_column:
1270
+ The column name that reflects the sequence column in the PTI table.
1271
+
1272
+ timecode_column_index:
1273
+ The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1274
+ performed.
1275
+
1276
+ sequence_column_index:
1277
+ The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1278
+ performed.
1279
+
1280
+ RETURNS:
1281
+ A list of renamed PTI related columns.
1282
+
1283
+ EXAMPLES:
1284
+ cols = _rename_to_pti_columns(cols, timecode_column, sequence_column, t_index=None, s_index)
1285
+ cols = _rename_to_pti_columns(cols, timecode_column, sequence_column)
1286
+ """
1287
+ # Rename the timecode_column to what it is in Vantage
1288
+ if timecode_column_index is not None:
1289
+ col_names = _rename_column(col_names, PTITableConstants.TD_TIMECODE.value, timecode_column)
1290
+ else:
1291
+ col_names = _rename_column(col_names, timecode_column, PTITableConstants.TD_TIMECODE.value)
1292
+
1293
+ # Rename the sequence_column to what it is in Vantage
1294
+ if sequence_column is not None:
1295
+ if sequence_column_index is not None:
1296
+ col_names = _rename_column(col_names, PTITableConstants.TD_SEQNO.value, sequence_column)
1297
+ else:
1298
+ col_names = _rename_column(col_names, sequence_column, PTITableConstants.TD_SEQNO.value)
1299
+
1300
+ return col_names
1301
+
1302
+
1303
+ def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column, df_col_type_list = None):
1304
+ """
1305
+ Internal function to reorder the list of columns used to construct the 'INSERT INTO'
1306
+ statement as required when the target table is a PTI table.
1307
+
1308
+ PARAMETERS:
1309
+ df_column_list:
1310
+ A list of column names for the columns in the DataFrame.
1311
+
1312
+ timecode_column:
1313
+ The timecode_columns which should be moved to the first position.
1314
+
1315
+ sequence_column:
1316
+ The timecode_columns which should be moved to the first position.
1317
+
1318
+ df_col_type_list:
1319
+ Optionally reorder the list containing the types of the columns to match the
1320
+ reordering the of df_column_list.
1321
+
1322
+ RETURNS:
1323
+ A reordered list of columns names for the columns in the DataFrame.
1324
+ If the optional types list is also specified, then a tuple of the list reordered columns names
1325
+ and the list of the column types.
1326
+
1327
+ EXAMPLE:
1328
+ new_colname_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
1329
+ new_colname_list, new_type_list = _reorder_insert_list_for_pti(df_column_list, timecode_column,
1330
+ sequence_column, df_col_type_list)
1331
+ """
1332
+ # Reposition timecode (to the first) and sequence column (to the second)
1333
+ # in df_column_list
1334
+ timecode_column_index = df_column_list.index(timecode_column)
1335
+ df_column_list.insert(0, df_column_list.pop(timecode_column_index))
1336
+ if df_col_type_list is not None:
1337
+ df_col_type_list.insert(0, df_col_type_list.pop(timecode_column_index))
1338
+
1339
+ if sequence_column is not None:
1340
+ sequence_column_index = df_column_list.index(sequence_column)
1341
+ df_column_list.insert(1, df_column_list.pop(sequence_column_index))
1342
+ if df_col_type_list is not None:
1343
+ df_col_type_list.insert(0, df_col_type_list.pop(sequence_column_index))
1344
+
1345
+ if df_col_type_list is not None:
1346
+ return df_column_list, df_col_type_list
1347
+ else:
1348
+ return df_column_list
1349
+
1350
+
1351
+ def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
1352
+ is_pti=False, timecode_column=None, sequence_column=None):
1353
+ """
1354
+ Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
1355
+ and check if the number of columns and their names are matching to determine table insertion compatibility.
1356
+
1357
+ PARAMETERS:
1358
+ table1_col_object:
1359
+ Specifies a list/collection of SQLAlchemy ColumnExpression Objects for first table.
1360
+
1361
+ table2_cols:
1362
+ Specifies a list of column names for second table (teradataml DataFrame).
1363
+
1364
+ is_pandas_df:
1365
+ Flag specifying whether the table objects to check are pandas DataFrames or not
1366
+ Default: False
1367
+ Note: When this flag is True, table2_cols is passed as a tuple object of
1368
+ ([column_names], [column_types])
1369
+
1370
+ is_pti:
1371
+ Boolean flag indicating if the target table is a PTI table.
1372
+
1373
+ timecode_column:
1374
+ timecode_column required to order the select expression for the insert.
1375
+ It should be the first column in the select expression.
1376
+ q
1377
+ sequence_column:
1378
+ sequence_column required to order the select expression for the insert.
1379
+ It should be the second column in the select expression.
1380
+
1381
+
1382
+ RETURNS:
1383
+ a) True, when insertion compatible (number of columns and their names match)
1384
+ b) False, otherwise
1385
+
1386
+ RAISES:
1387
+ N/A
1388
+
1389
+ EXAMPLES:
1390
+ _check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
1391
+ _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
1392
+
1393
+ """
1394
+ table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
1395
+ table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
1396
+
1397
+ # Check for number of columns
1398
+ if len(table1_col_names) != len(table2_col_names):
1399
+ return False
1400
+
1401
+ if is_pti is True:
1402
+ # Reposition timecode (to the first) and sequence column (to the second)
1403
+ # with their names as generated by the database, in col_name since that
1404
+ # is the default position of the columns.
1405
+ table2_col_names = _reorder_insert_list_for_pti(table2_col_names, timecode_column, sequence_column)
1406
+ table2_col_names = _rename_to_pti_columns(table2_col_names, timecode_column, sequence_column)
1407
+
1408
+ # Check for the column names
1409
+ for i in range(len(table1_col_names)):
1410
+ if table1_col_names[i] != table2_col_names[i]:
1411
+ return False
1412
+
1413
+ # Number of columns and their names in both List of ColumnExpressions match
1414
+ return True
1415
+
1416
+
1417
+ def _extract_column_info(df, types = None, index = False, index_label = None):
1418
+ """
1419
+ This is an internal function used to extract column information for a DF,
1420
+ and map to user-specified teradatasqlalchemy types, if specified,
1421
+ for Table creation.
1422
+
1423
+ PARAMETERS:
1424
+ df:
1425
+ The Pandas DataFrame object to be saved.
1426
+
1427
+ types:
1428
+ A python dictionary with column names and required types as key-value pairs.
1429
+
1430
+ index:
1431
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1432
+
1433
+ index_label:
1434
+ Column label(s) for index column(s).
1435
+
1436
+ RETURNS:
1437
+ A tuple with the following elements:
1438
+ a) List of DataFrame Column names
1439
+ b) List of equivalent teradatasqlalchemy column types
1440
+
1441
+ RAISES:
1442
+ None
1443
+
1444
+ EXAMPLES:
1445
+ _extract_column_info(df = my_df)
1446
+ _extract_column_info(df = my_df, types = {'id_col': INTEGER})
1447
+
1448
+ """
1449
+ if isinstance(df, str):
1450
+ return list(types.keys()), list(types.values())
1451
+
1452
+ col_names = _get_pd_df_column_names(df)
1453
+
1454
+ # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1455
+ # TIMESTAMP(timezone=True) else map it according to default value.
1456
+ col_types = [types.get(col_name) if types and col_name in types else
1457
+ TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1458
+ and (df[col_name].dt.tz is not None)
1459
+ else _get_sqlalchemy_mapping_types(str(df.dtypes[key]))
1460
+ for key, col_name in enumerate(list(df.columns))]
1461
+
1462
+ ind_names = []
1463
+ ind_types = []
1464
+ if index:
1465
+ ind_names, ind_types = _get_index_labels(df, index_label)
1466
+ ind_types = [types.get(ind_name) if types and ind_name in types
1467
+ else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes[key])
1468
+ and (df[ind_name].dt.tz is not None)
1469
+ else _get_sqlalchemy_mapping_types(str(ind_types[key]))
1470
+ for key, ind_name in enumerate(ind_names)]
1471
+
1472
+ return col_names + ind_names, col_types + ind_types
1473
+
1474
+
1475
+ def _insert_from_dataframe(df, con, schema_name, table_name, index, chunksize,
1476
+ is_pti=False, timecode_column=None, sequence_column=None,
1477
+ match_column_order=True):
1478
+ """
1479
+ This is an internal function used to sequentially extract column info from DF,
1480
+ iterate rows, and insert rows manually.
1481
+ Used for Insertions to Temporary Tables & Tables with Pandas index.
1482
+
1483
+ This uses DBAPI's executeMany() which is a batch insertion method.
1484
+
1485
+ PARAMETERS:
1486
+ df:
1487
+ The Pandas DataFrame object to be saved.
1488
+
1489
+ con:
1490
+ A SQLAlchemy connectable (engine/connection) object
1491
+
1492
+ schema_name:
1493
+ Name of the schema.
1494
+
1495
+ table_name:
1496
+ Name of the table.
1497
+
1498
+ index:
1499
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
1500
+
1501
+ chunksize:
1502
+ Specifies the number of rows to be loaded in a batch.
1503
+ Note:
1504
+ This is argument is used only when argument "df" is pandas DataFrame.
1505
+
1506
+ is_pti:
1507
+ Boolean flag indicating if the table should be a PTI table.
1508
+
1509
+ timecode_column:
1510
+ timecode_column required to order the select expression for the insert.
1511
+ It should be the first column in the select expression.
1512
+
1513
+ sequence_column:
1514
+ sequence_column required to order the select expression for the insert.
1515
+ It should be the second column in the select expression.
1516
+
1517
+ match_column_order:
1518
+ Specifies the order of the df to be loaded matches the order of the
1519
+ existing df or not.
1520
+
1521
+ RETURNS:
1522
+ N/A
1523
+
1524
+ RAISES:
1525
+ N/A
1526
+
1527
+ EXAMPLES:
1528
+ _insert_from_dataframe(df = my_df, con = tdconnection, schema = None, table_name = 'test_table',
1529
+ index = True, index_label = None)
1530
+ """
1531
+ col_names = _get_pd_df_column_names(df)
1532
+
1533
+ # Quoted, schema-qualified table name
1534
+ table = '"{}"'.format(table_name)
1535
+ if schema_name is not None:
1536
+ table = '"{}".{}'.format(schema_name, table_name)
1537
+
1538
+ try:
1539
+
1540
+ if is_pti:
1541
+ # This if for non-index columns.
1542
+ col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
1543
+
1544
+ is_multi_index = isinstance(df.index, pd.MultiIndex)
1545
+
1546
+ insert_list = []
1547
+
1548
+ if not match_column_order:
1549
+ ins = "INSERT INTO {} {} VALUES {};".format(
1550
+ table,
1551
+ '(' + ', '.join(col_names) + ')',
1552
+ '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1553
+ if index is True else len(col_names))]) + ')')
1554
+ else:
1555
+ ins = "INSERT INTO {} VALUES {};".format(
1556
+ table,
1557
+ '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
1558
+ if index is True else len(col_names))]) + ')')
1559
+
1560
+ # Empty queryband buffer before SQL call.
1561
+ UtilFuncs._set_queryband()
1562
+ rowcount = 0
1563
+ # Iterate rows of DataFrame over new re-ordered columns
1564
+ for row_index, row in enumerate(df[col_names].itertuples(index=True)):
1565
+ ins_dict = ()
1566
+ for col_index, x in enumerate(col_names):
1567
+ ins_dict = ins_dict + (row[col_index+1],)
1568
+
1569
+ if index is True:
1570
+ ins_dict = ins_dict + row[0] if is_multi_index else ins_dict + (row[0],)
1571
+
1572
+ insert_list.append(ins_dict)
1573
+ rowcount = rowcount + 1
1574
+
1575
+ # dbapi_batchsize corresponds to the max batch size for the DBAPI driver.
1576
+ # Insert the rows once the batch-size reaches the max allowed.
1577
+ if rowcount == chunksize:
1578
+ # Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
1579
+ cur = execute_sql(ins, insert_list)
1580
+ if cur is not None:
1581
+ cur.close()
1582
+ rowcount = 0
1583
+ insert_list.clear()
1584
+
1585
+ # Insert any remaining rows.
1586
+ if rowcount > 0:
1587
+ cur = execute_sql(ins, insert_list)
1588
+ if cur is not None:
1589
+ cur.close()
1590
+
1591
+ except Exception:
1592
+ raise
1593
+
1594
+
1595
+ def _get_pd_df_column_names(df):
1596
+ """
1597
+ Internal function to return the names of columns in a Pandas DataFrame.
1598
+
1599
+ PARAMETERS
1600
+ df:
1601
+ The Pandas DataFrame to fetch the column names for.
1602
+
1603
+ RETURNS:
1604
+ A list of Strings
1605
+
1606
+ RAISES:
1607
+ None
1608
+
1609
+ EXAMPLES:
1610
+ _get_pd_df_column_names(df = my_df)
1611
+ """
1612
+ return df.columns.tolist()
1613
+
1614
+
1615
+ def _get_sqlalchemy_mapping(key):
1616
+ """
1617
+ This is an internal function used to returns a SQLAlchemy Type Mapping
1618
+ for a given Pandas DataFrame column Type.
1619
+ Used for Table Object creation internally based on DF column info.
1620
+
1621
+ For an unknown key, String (Mapping to VARCHAR) is returned
1622
+
1623
+ PARAMETERS:
1624
+ key : String representing Pandas type ('int64', 'object' etc.)
1625
+
1626
+ RETURNS:
1627
+ SQLAlchemy Type Object(Integer, String, Float, DateTime etc.)
1628
+
1629
+ RAISES:
1630
+ N/A
1631
+
1632
+ EXAMPLES:
1633
+ _get_sqlalchemy_mapping(key = 'int64')
1634
+ """
1635
+ teradata_types_map = _get_all_sqlalchemy_mappings()
1636
+
1637
+ if key in teradata_types_map.keys():
1638
+ return teradata_types_map.get(key)
1639
+ else:
1640
+ return VARCHAR(configure.default_varchar_size,charset='UNICODE')
1641
+
1642
+
1643
+ def _get_all_sqlalchemy_mappings():
1644
+ """
1645
+ This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
1646
+ It contains mappings from pandas data type to objects of SQLAlchemy Types
1647
+
1648
+ PARAMETERS:
1649
+
1650
+ RETURNS:
1651
+ dictionary { pandas_type : SQLAlchemy Type Object}
1652
+
1653
+ RAISES:
1654
+ N/A
1655
+
1656
+ EXAMPLES:
1657
+ _get_all_sqlalchemy_mappings()
1658
+ """
1659
+ teradata_types_map = {'int32':INTEGER(), 'int64':BIGINT(),
1660
+ 'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1661
+ 'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1662
+ 'float64':FLOAT(), 'float32':FLOAT(), 'bool':BYTEINT(),
1663
+ 'datetime64':TIMESTAMP(), 'datetime64[ns]':TIMESTAMP(),
1664
+ 'datetime64[ns, UTC]':TIMESTAMP(timezone=True),
1665
+ 'timedelta64[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
1666
+ 'timedelta[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE')}
1667
+
1668
+ return teradata_types_map
1669
+
1670
+
1671
+ def _get_sqlalchemy_mapping_types(key):
1672
+ """
1673
+ This is an internal function used to return a SQLAlchemy Type Mapping
1674
+ for a given Pandas DataFrame column Type.
1675
+ Used for Table Object creation internally based on DF column info.
1676
+
1677
+ For an unknown key, String (Mapping to VARCHAR) is returned
1678
+
1679
+ PARAMETERS:
1680
+ key : String representing Pandas type ('int64', 'object' etc.)
1681
+
1682
+ RETURNS:
1683
+ SQLAlchemy Type (Integer, String, Float, DateTime etc.)
1684
+
1685
+ RAISES:
1686
+ N/A
1687
+
1688
+ EXAMPLES:
1689
+ _get_sqlalchemy_mapping_types(key = 'int64')
1690
+ """
1691
+ teradata_types_map = _get_all_sqlalchemy_types_mapping()
1692
+
1693
+ if key in teradata_types_map.keys():
1694
+ return teradata_types_map.get(key)
1695
+ else:
1696
+ return VARCHAR(configure.default_varchar_size,charset='UNICODE')
1697
+
1698
+
1699
+ def _get_all_sqlalchemy_types_mapping():
1700
+ """
1701
+ This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
1702
+ It contains mappings from pandas data type to SQLAlchemyTypes
1703
+
1704
+ PARAMETERS:
1705
+
1706
+ RETURNS:
1707
+ dictionary { pandas_type : SQLAlchemy Type}
1708
+
1709
+ RAISES:
1710
+ N/A
1711
+
1712
+ EXAMPLES:
1713
+ _get_all_sqlalchemy_types_mapping()
1714
+ """
1715
+ teradata_types_map = {'int32': INTEGER, 'int64': BIGINT,
1716
+ 'object': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
1717
+ 'O': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
1718
+ 'float64': FLOAT, 'float32': FLOAT, 'bool': BYTEINT,
1719
+ 'datetime64': TIMESTAMP, 'datetime64[ns]': TIMESTAMP,
1720
+ 'datetime64[ns, UTC]': TIMESTAMP(timezone=True),
1721
+ 'timedelta64[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
1722
+ 'timedelta[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE')}
1723
+
1724
+ return teradata_types_map
1725
+
1726
+
1727
+ def _validate_timezero_date(timezero_date):
1728
+ """
1729
+ Internal function to validate timezero_date specified when creating a
1730
+ Primary Time Index (PTI) table.
1731
+
1732
+ PARAMETERS:
1733
+ timezero_date:
1734
+ The timezero_date passed to primary_time_index().
1735
+
1736
+ RETURNS:
1737
+ True if the value is valid.
1738
+
1739
+ RAISES:
1740
+ ValueError when the value is invalid.
1741
+
1742
+ EXAMPLE:
1743
+ _validate_timezero_date("DATE '2011-01-01'")
1744
+ _validate_timezero_date('2011-01-01') # Invalid
1745
+ """
1746
+ # Return True is it is not specified or is None since it is optional
1747
+ if timezero_date is None:
1748
+ return True
1749
+
1750
+ pattern = re.compile(PTITableConstants.PATTERN_TIMEZERO_DATE.value)
1751
+ match = pattern.match(timezero_date)
1752
+
1753
+ err_msg = Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(timezero_date,
1754
+ 'timezero_date',
1755
+ "str of format DATE 'YYYY-MM-DD'")
1756
+
1757
+ try:
1758
+ datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
1759
+ except (ValueError, AttributeError):
1760
+ raise TeradataMlException(err_msg,
1761
+ MessageCodes.INVALID_ARG_VALUE)
1762
+
1763
+ # Looks like the value is valid
1764
+ return True