teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,2446 @@
1
+ #!/usr/bin/python
2
+ # ##################################################################
3
+ #
4
+ # Copyright 2018 Teradata. All rights reserved.
5
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
6
+ #
7
+ # ##################################################################
8
+
9
+ import re
10
+ import datetime
11
+ import warnings
12
+ import pandas as pd
13
+ import pandas.api.types as pt
14
+
15
+ from sqlalchemy import MetaData, Table, Column
16
+ from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
17
+ from teradataml.dataframe.sql import ColumnExpression
18
+ from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
19
+ from teradatasqlalchemy import (TIMESTAMP, DATE)
20
+ from teradatasqlalchemy import (VARCHAR)
21
+ from teradatasqlalchemy import (PERIOD_DATE,PERIOD_TIMESTAMP)
22
+ from teradatasqlalchemy.dialect import TDCreateTablePost as post
23
+ from teradataml.common.aed_utils import AedUtils
24
+ from teradataml.context.context import *
25
+ from teradataml.dataframe import dataframe as tdmldf
26
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
27
+ from teradataml.dbutils.dbutils import _rename_table
28
+ from teradataml.common.utils import UtilFuncs
29
+ from teradataml.options.configure import configure
30
+ from teradataml.common.constants import CopyToConstants, PTITableConstants, TeradataTypes
31
+ from teradatasql import OperationalError
32
+ from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
33
+ from teradataml.utils.utils import execute_sql
34
+ from teradataml.utils.validators import _Validators
35
+ from teradataml.telemetry_utils.queryband import collect_queryband
36
+ from teradatasqlalchemy.dialect import dialect as td_dialect
37
+
38
+ from teradataml.utils.dtypes import _TupleOf
39
+
40
+ @collect_queryband(queryband="CpToSql")
41
+ def copy_to_sql(df, table_name,
42
+ schema_name=None, if_exists='append',
43
+ index=False, index_label=None,
44
+ primary_index=None,
45
+ temporary=False, types = None,
46
+ primary_time_index_name = None,
47
+ timecode_column=None,
48
+ timebucket_duration = None,
49
+ timezero_date = None,
50
+ columns_list=None,
51
+ sequence_column=None,
52
+ seq_max=None,
53
+ set_table=False,
54
+ chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
55
+ match_column_order=True,
56
+ partition_by=None,
57
+ partition_by_case=None,
58
+ partition_by_range=None,
59
+ sub_partition=None,
60
+ **kwargs):
61
+ """
62
+ Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
63
+
64
+ PARAMETERS:
65
+
66
+ df:
67
+ Required Argument.
68
+ Specifies the Pandas or teradataml DataFrame object to be saved.
69
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
70
+
71
+ table_name:
72
+ Required Argument.
73
+ Specifies the name of the table to be created in Vantage.
74
+ Types : String
75
+
76
+ schema_name:
77
+ Optional Argument.
78
+ Specifies the name of the SQL schema in Teradata Vantage to write to.
79
+ Types: String
80
+ Default: None (Uses default database schema).
81
+
82
+ Note: schema_name will be ignored when temporary=True.
83
+
84
+ if_exists:
85
+ Optional Argument.
86
+ Specifies the action to take when table already exists in Vantage.
87
+ Types: String
88
+ Possible values: {'fail', 'replace', 'append'}
89
+ - fail: If table exists, do nothing.
90
+ - replace: If table exists, drop it, recreate it, and insert data.
91
+ - append: If table exists, insert data. Create if does not exist.
92
+ Default : append
93
+
94
+ Note: Replacing a table with the contents of a teradataml DataFrame based on
95
+ the same underlying table is not supported.
96
+
97
+ index:
98
+ Optional Argument.
99
+ Specifies whether to save Pandas DataFrame index as a column or not.
100
+ Types : Boolean (True or False)
101
+ Default : False
102
+
103
+ Note: Only use as True when attempting to save Pandas DataFrames (and not with teradataml DataFrames).
104
+
105
+ index_label:
106
+ Optional Argument.
107
+ Specifies the column label(s) for Pandas DataFrame index column(s).
108
+ Types : String or list of strings
109
+ Default : None
110
+
111
+ Note: If index_label is not specified (defaulted to None or is empty) and `index` is True, then
112
+ the 'names' property of the DataFrames index is used as the label(s),
113
+ and if that too is None or empty, then:
114
+ 1) a default label 'index_label' or 'level_0' (when 'index_label' is already taken) is used
115
+ when index is standard.
116
+ 2) default labels 'level_0', 'level_1', etc. are used when index is multi-level index.
117
+
118
+ Only use as True when attempting to save Pandas DataFrames (and not on teradataml DataFrames).
119
+
120
+ primary_index:
121
+ Optional Argument.
122
+ Specifies which column(s) to use as primary index while creating Teradata table(s) in Vantage.
123
+ When None, No Primary Index Teradata tables are created.
124
+ Types : String or list of strings
125
+ Default : None
126
+ Example:
127
+ primary_index = 'my_primary_index'
128
+ primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
129
+
130
+ temporary:
131
+ Optional Argument.
132
+ Specifies whether to creates Vantage tables as permanent or volatile.
133
+ Types : Boolean (True or False)
134
+ Default : False
135
+
136
+ Note: When True:
137
+ 1. volatile Tables are created, and
138
+ 2. schema_name is ignored.
139
+ When False, permanent tables are created.
140
+
141
+ types
142
+ Optional Argument.
143
+ Specifies required data types for requested columns to be saved in Teradata Vantage.
144
+ Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
145
+ Default: None
146
+
147
+ Note:
148
+ 1. This argument accepts a dictionary of columns names and their required teradatasqlalchemy types
149
+ as key-value pairs, allowing to specify a subset of the columns of a specific type.
150
+ i) When the input is a Pandas DataFrame:
151
+ - When only a subset of all columns are provided, the column types for the rest are assigned
152
+ appropriately.
153
+ - When types argument is not provided, the column types are assigned
154
+ as listed in the following table:
155
+ +---------------------------+-----------------------------------------+
156
+ | Pandas/Numpy Type | teradatasqlalchemy Type |
157
+ +---------------------------+-----------------------------------------+
158
+ | int32 | INTEGER |
159
+ +---------------------------+-----------------------------------------+
160
+ | int64 | BIGINT |
161
+ +---------------------------+-----------------------------------------+
162
+ | bool | BYTEINT |
163
+ +---------------------------+-----------------------------------------+
164
+ | float32/float64 | FLOAT |
165
+ +---------------------------+-----------------------------------------+
166
+ | datetime64/datetime64[ns] | TIMESTAMP |
167
+ +---------------------------+-----------------------------------------+
168
+ | datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
169
+ +---------------------------+-----------------------------------------+
170
+ | Any other data type | VARCHAR(configure.default_varchar_size) |
171
+ +---------------------------+-----------------------------------------+
172
+ ii) When the input is a teradataml DataFrame:
173
+ - When only a subset of all columns are provided, the column types for the rest are retained.
174
+ - When types argument is not provided, the column types are retained.
175
+ 2. This argument does not have any effect when the table specified using table_name and schema_name
176
+ exists and if_exists = 'append'.
177
+
178
+ primary_time_index_name:
179
+ Optional Argument.
180
+ Specifies a name for the Primary Time Index (PTI) when the table
181
+ to be created must be a PTI table.
182
+ Type: String
183
+
184
+ Note: This argument is not required or used when the table to be created
185
+ is not a PTI table. It will be ignored if specified without the timecode_column.
186
+
187
+ timecode_column:
188
+ Optional argument.
189
+ Required when the DataFrame must be saved as a PTI table.
190
+ Specifies the column in the DataFrame that reflects the form
191
+ of the timestamp data in the time series.
192
+ This column will be the TD_TIMECODE column in the table created.
193
+ It should be of SQL type TIMESTAMP(n), TIMESTAMP(n) WITH TIMEZONE, or DATE,
194
+ corresponding to Python types datetime.datetime or datetime.date, or Pandas dtype datetime64[ns].
195
+ Type: String
196
+
197
+ Note: When you specify this parameter, an attempt to create a PTI table
198
+ will be made. This argument is not required when the table to be created
199
+ is not a PTI table. If this argument is specified, primary_index will be ignored.
200
+
201
+ timezero_date:
202
+ Optional Argument.
203
+ Used when the DataFrame must be saved as a PTI table.
204
+ Specifies the earliest time series data that the PTI table will accept;
205
+ a date that precedes the earliest date in the time series data.
206
+ Value specified must be of the following format: DATE 'YYYY-MM-DD'
207
+ Default Value: DATE '1970-01-01'.
208
+ Type: String
209
+
210
+ Note: This argument is not required or used when the table to be created
211
+ is not a PTI table. It will be ignored if specified without the timecode_column.
212
+
213
+ timebucket_duration:
214
+ Optional Argument.
215
+ Required if columns_list is not specified or is None.
216
+ Used when the DataFrame must be saved as a PTI table.
217
+ Specifies a duration that serves to break up the time continuum in
218
+ the time series data into discrete groups or buckets.
219
+ Specified using the formal form time_unit(n), where n is a positive
220
+ integer, and time_unit can be any of the following:
221
+ CAL_YEARS, CAL_MONTHS, CAL_DAYS, WEEKS, DAYS, HOURS, MINUTES,
222
+ SECONDS, MILLISECONDS, or MICROSECONDS.
223
+ Type: String
224
+
225
+ Note: This argument is not required or used when the table to be created
226
+ is not a PTI table. It will be ignored if specified without the timecode_column.
227
+
228
+ columns_list:
229
+ Optional Argument.
230
+ Used when the DataFrame must be saved as a PTI table.
231
+ Required if timebucket_duration is not specified.
232
+ A list of one or more PTI table column names.
233
+ Type: String or list of Strings
234
+
235
+ Note: This argument is not required or used when the table to be created
236
+ is not a PTI table. It will be ignored if specified without the timecode_column.
237
+
238
+ sequence_column:
239
+ Optional Argument.
240
+ Used when the DataFrame must be saved as a PTI table.
241
+ Specifies the column of type Integer containing the unique identifier for
242
+ time series data readings when they are not unique in time.
243
+ * When specified, implies SEQUENCED, meaning more than one reading from the same
244
+ sensor may have the same timestamp.
245
+ This column will be the TD_SEQNO column in the table created.
246
+ * When not specified, implies NONSEQUENCED, meaning there is only one sensor reading
247
+ per timestamp.
248
+ This is the default.
249
+ Type: str
250
+
251
+ Note: This argument is not required or used when the table to be created
252
+ is not a PTI table. It will be ignored if specified without the timecode_column.
253
+
254
+ seq_max:
255
+ Optional Argument.
256
+ Used when the DataFrame must be saved as a PTI table.
257
+ Specifies the maximum number of sensor data rows that can have the
258
+ same timestamp. Can be used when 'sequenced' is True.
259
+ Accepted range: 1 - 2147483647.
260
+ Default Value: 20000.
261
+ Type: int
262
+
263
+ Note: This argument is not required or used when the table to be created
264
+ is not a PTI table. It will be ignored if specified without the timecode_column.
265
+
266
+ set_table:
267
+ Optional Argument.
268
+ Specifies a flag to determine whether to create a SET or a MULTISET table.
269
+ When True, a SET table is created.
270
+ When False, a MULTISET table is created.
271
+ Default Value: False
272
+ Type: boolean
273
+
274
+ Note: 1. Specifying set_table=True also requires specifying primary_index or timecode_column.
275
+ 2. Creating SET table (set_table=True) may result in
276
+ a. an error if the source is a Pandas DataFrame having duplicate rows.
277
+ b. loss of duplicate rows if the source is a teradataml DataFrame.
278
+ 3. This argument has no effect if the table already exists and if_exists='append'.
279
+
280
+ chunksize:
281
+ Optional Argument.
282
+ Specifies the number of rows to be loaded in a batch.
283
+ Note:
284
+ This is argument is used only when argument "df" is pandas DataFrame.
285
+ Default Value: 16383
286
+ Types: int
287
+
288
+ match_column_order:
289
+ Optional Argument.
290
+ Specifies whether the order of the columns in existing table matches the order of
291
+ the columns in the "df" or not. When set to False, the dataframe to be loaded can
292
+ have any order and number of columns.
293
+ Default Value: True
294
+ Types: bool
295
+
296
+ partition_by:
297
+ Optional Argument.
298
+ Specifies the columns on which partition should be created while creating the table.
299
+ Note:
300
+ 1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
301
+ 2. "primary_index" should be specified when "partition_by" is used.
302
+ 3. Not applicable for PTI tables.
303
+ Types: str or ColumnExpression
304
+
305
+ partition_by_case:
306
+ Optional Argument.
307
+ Specifies different cases to partition the index while creating table.
308
+ Note:
309
+ 1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
310
+ 2. "primary_index" should be specified when "partition_by_case" is used.
311
+ 3. Not applicable for PTI tables.
312
+ Types: str or ColumnExpression or tuple of ColumnExpression, str
313
+
314
+ partition_by_range:
315
+ Optional Argument.
316
+ Specifies the range of values on which partition should be created while creating a table.
317
+ Note:
318
+ 1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
319
+ 2. "primary_index" should be specified when "partition_by_range" is used.
320
+ 3. Not applicable for PTI tables.
321
+ types: str or ColumnExpression
322
+
323
+ sub_partition:
324
+ Optional Argument.
325
+ Specifies the details to subpartition the main partition according to the value provided while creating the table.
326
+ Note:
327
+ 1. "sub_partition" is applicable only when "partition_by_range" is specified.
328
+ 2. Not applicable for PTI tables.
329
+ Types: int or Teradata Interval datatypes
330
+
331
+ **kwargs:
332
+ Optional keyword arguments.
333
+
334
+ valid_time_columns:
335
+ Optional Argument.
336
+ Specifies the name(s) of the valid time columns to be referred in "df".
337
+ When "valid_time_columns" is specified, then function considers
338
+ these columns as valid time dimension columns and creates a
339
+ valid time dimension temporal table if table does not exist.
340
+ Notes:
341
+ * If a string is provided, the column must be of PERIOD type.
342
+ Types: tuple of strings or str
343
+
344
+ derived_column:
345
+ Optional Argument.
346
+ Specifies the name of the derived column to be kept in the temporal table.
347
+ Notes:
348
+ * Argument is ignored if "valid_time_columns" are not specified.
349
+ * Argument is considered only if copy_to_sql() is creating a table.
350
+ * If "valid_time_columns" is specified and "derived_column" is not specified,
351
+ then copy_to_sql() automatically creates a derived column by adding "_" between
352
+ the columns mentioned in "valid_time_columns". For example,
353
+ if "valid_time_columns" is ('col1', 'col2') and "derived_column"
354
+ is not specified, then copy_to_sql() creates table with
355
+ derived column name as 'col1_col2'.
356
+ Types: str
357
+
358
+ RETURNS:
359
+ None
360
+
361
+ RAISES:
362
+ TeradataMlException
363
+
364
+ EXAMPLES:
365
+ 1. Saving a Pandas DataFrame:
366
+
367
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
368
+ >>> from teradatasqlalchemy.types import *
369
+
370
+ >>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
371
+ ... 'emp_sage': [100, 200, 300, 400],
372
+ ... 'emp_id': [133, 144, 155, 177],
373
+ ... 'marks': [99.99, 97.32, 94.67, 91.00]
374
+ ... }
375
+
376
+ >>> pandas_df = pd.DataFrame(df)
377
+
378
+ a) Save a Pandas DataFrame using a dataframe & table name only:
379
+ >>> copy_to_sql(df=pandas_df, table_name='my_table')
380
+
381
+ b) Saving as a SET table
382
+ >>> copy_to_sql(df=pandas_df, table_name='my_set_table', index=True,
383
+ primary_index='index_label', set_table=True)
384
+
385
+ c) Save a Pandas DataFrame by specifying additional parameters:
386
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_2', schema_name='alice',
387
+ ... index=True, index_label='my_index_label', temporary=False,
388
+ ... primary_index=['emp_id'], if_exists='append',
389
+ ... types={'emp_name': VARCHAR, 'emp_sage':INTEGER,
390
+ ... 'emp_id': BIGINT, 'marks': DECIMAL})
391
+
392
+ d) Saving with additional parameters as a SET table
393
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_3', schema_name='alice',
394
+ ... index=True, index_label='my_index_label', temporary=False,
395
+ ... primary_index=['emp_id'], if_exists='append',
396
+ ... types={'emp_name': VARCHAR, 'emp_sage':INTEGER,
397
+ ... 'emp_id': BIGINT, 'marks': DECIMAL},
398
+ ... set_table=True)
399
+
400
+ e) Saving levels in index of type MultiIndex
401
+ >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
402
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_4', schema_name='alice',
403
+ ... index=True, index_label=['index1', 'index2'], temporary=False,
404
+ ... primary_index=['index1'], if_exists = 'replace')
405
+
406
+ f) Save a Pandas DataFrame with VECTOR datatype:
407
+ >>> import pandas as pd
408
+ >>> VECTOR_data = {
409
+ ... 'id': [10, 11, 12, 13],
410
+ ... 'array_col': ['1,1', '2,2', '3,3', '4,4']
411
+ ... }
412
+ >>> df = pd.DataFrame(VECTOR_data)
413
+
414
+ >>> from teradatasqlalchemy import VECTOR
415
+ >>> copy_to_sql(df=df, table_name='my_vector_table', types={'array_col': VECTOR})
416
+
417
+ g) Saving pandas DataFrame with partition_by:
418
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_5', if_exists='replace',
419
+ ... primary_index=['emp_id'],
420
+ ... partition_by='emp_id')
421
+
422
+ h) Saving pandas DataFrame with partition_by_case:
423
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_6', if_exists='replace',
424
+ ... primary_index=['emp_id'],
425
+ ... partition_by_case='emp_id > 100, emp_id < 500')
426
+
427
+ i) Saving pandas DataFrame with partition_by_range:
428
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_7', if_exists='replace',
429
+ ... primary_index=['emp_id'],
430
+ ... partition_by_range='emp_id BETWEEN 100 AND 500')
431
+
432
+
433
+ j) Save a Pandas DataFrame with valid time columns of DATE type to a temporal table.
434
+ >>> import pandas as pd
435
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
436
+ >>> df = pd.DataFrame({
437
+ ... 'id': [1, 2, 3],
438
+ ... 'start_date': pd.to_datetime(['2024-01-01', '2024-02-01', '2024-03-01']).date,
439
+ ... 'end_date': pd.to_datetime(['2024-01-10', '2024-02-10', '2024-03-10']).date,
440
+ ... 'description': ['a', 'b', 'c']
441
+ ... })
442
+ >>> copy_to_sql(
443
+ ... df=df,
444
+ ... table_name='temporal_table_pandas_date',
445
+ ... valid_time_columns=('start_date', 'end_date')
446
+ ... )
447
+
448
+ k) Save a Pandas DataFrame with valid time columns of TIMESTAMP type
449
+ to a temporal table. Name the derived column as `valid_time`.
450
+ >>> import pandas as pd
451
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
452
+ >>> df = pd.DataFrame({
453
+ ... 'id': [1, 2, 3],
454
+ ... 'start_time': pd.to_datetime(['2024-01-01 10:00:00', '2024-02-01 11:00:00', '2024-03-01 12:00:00']),
455
+ ... 'end_time': pd.to_datetime(['2024-01-01 12:00:00', '2024-02-01 13:00:00', '2024-03-01 14:00:00']),
456
+ ... 'description: ['a', 'b', 'c']
457
+ ... })
458
+ >>> copy_to_sql(
459
+ ... df=df,
460
+ ... table_name='temporal_table_pandas_timestamp',
461
+ ... valid_time_columns=('start_time', 'end_time'),
462
+ ... derived_column='valid_time'
463
+ ... )
464
+
465
+ f) Save a teradataml DataFrame with valid time column of PERIOD type to a temporal table.
466
+ >>> from teradataml.dataframe.dataframe import DataFrame
467
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
468
+ >>> from teradataml.data.load_example_data import load_example_data
469
+ >>> load_example_data("teradataml", "Employee_roles")
470
+ >>> from teradatasqlalchemy.types import PERIOD_DATE
471
+ >>> df = DataFrame('Employee_roles')
472
+ >>> copy_to_sql(
473
+ ... df,
474
+ ... table_name = 'employee_roles_temporal',
475
+ ... valid_time_column='role_validity_period',
476
+ ... types={'role_validity_period':PERIOD_DATE}
477
+ ... )
478
+
479
+ 2. Saving a teradataml DataFrame:
480
+
481
+ >>> from teradataml.dataframe.dataframe import DataFrame
482
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
483
+ >>> from teradatasqlalchemy.types import *
484
+ >>> from teradataml.data.load_example_data import load_example_data
485
+
486
+ >>> # Load the data to run the example.
487
+ >>> load_example_data("glm", "admissions_train")
488
+
489
+ >>> # Create teradataml DataFrame(s)
490
+ >>> df = DataFrame('admissions_train')
491
+ >>> df2 = df.select(['gpa', 'masters'])
492
+
493
+ a) Save a teradataml DataFrame by using only a table name:
494
+ >>> df2.to_sql('my_tdml_table')
495
+
496
+ b) Save a teradataml DataFrame by using additional parameters:
497
+ >>> df2.to_sql(table_name = 'my_tdml_table', if_exists='append',
498
+ primary_index = ['gpa'], temporary=False, schema_name='alice')
499
+
500
+ c) Alternatively, save a teradataml DataFrame by using copy_to_sql:
501
+ >>> copy_to_sql(df2, 'my_tdml_table_2')
502
+
503
+ d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
504
+ >>> copy_to_sql(df=df2, table_name='my_tdml_table_3', schema_name='alice',
505
+ ... temporary=False, primary_index=None, if_exists='append',
506
+ ... types={'masters': VARCHAR, 'gpa':INTEGER})
507
+
508
+ e) Saving as a SET table
509
+ >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name='alice',
510
+ ... temporary=False, primary_index=['gpa'], if_exists='append',
511
+ ... types={'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
512
+
513
+ f) Saving a teradataml DataFrame into a table by partitioning the table with column 'gpa':
514
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_4', if_exists='replace',
515
+ ... primary_index=['gpa'],
516
+ ... partition_by=df.gpa)
517
+
518
+ g) Saving a teradataml DataFrame into a table with two partitions as below:
519
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_5', if_exists='replace',
520
+ ... primary_index=['id'],
521
+ ... partition_by_case=(df.id < 100, df.gpa < 5.0))
522
+
523
+ h) Saving a teradataml DataFrame into a table by partitioning the table with different ranges:
524
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_6', if_exists='replace',
525
+ ... primary_index=['id'],
526
+ ... partition_by_range=df.id.between(1, 100))
527
+
528
+ i) Saving a teradataml DataFrame into a table by partitioning the table with different ranges.
529
+ Also sub-partitioning based on INTERVAL:
530
+ >>> load_example_data("dataframe", "sales")
531
+ >>> df = DataFrame('sales')
532
+ >>> from teradatasqlalchemy import INTERVAL_DAY
533
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_7', if_exists='replace',
534
+ ... primary_index="Feb"
535
+ ... partition_by_range=df.datetime.between('2017-01-01', '2017-01-31'),
536
+ ... sub_partition=INTERVAL_DAY(1))
537
+
538
+ j) Save a teradataml DataFrame with valid time columns of DATE type to a temporal table.
539
+ pdf = pd.DataFrame({
540
+ ... 'id': [1, 2, 3],
541
+ ... 'start_date': pd.to_datetime(['2024-01-01', '2024-02-01', '2024-03-01']).date,
542
+ ... 'end_date': pd.to_datetime(['2024-01-10', '2024-02-10', '2024-03-10']).date,
543
+ ... 'description': ['a', 'b', 'c']
544
+ ... })
545
+ >>> df_temporal = DataFrame(data = pdf)
546
+ >>> copy_to_sql(df=df_temporal, table_name='temporal_table_tdml_date',
547
+ ... valid_time_columns=('start_date', 'end_date'))
548
+
549
+ k) Save a teradataml DataFrame with valid time columns of TIMESTAMP type
550
+ to a temporal table. Name the derived column as `validity_period`.
551
+ >>> df_temporal_ts = DataFrame(data = pd.DataFrame({
552
+ ... 'id': [1, 2, 3],
553
+ ... 'start_time': pd.to_datetime(['2024-01-01 10:00:00', '2024-02-01 11:00:00', '2024-03-01 12:00:00']),
554
+ ... 'end_time': pd.to_datetime(['2024-01-01 12:00:00', '2024-02-01 13:00:00', '2024-03-01 14:00:00']),
555
+ ... 'description': ['a', 'b', 'c']
556
+ ... }))
557
+ >>> copy_to_sql(df=df_temporal_ts, table_name='temporal_table_tdml_timestamp',
558
+ ... valid_time_columns=('start_time', 'end_time'), derived_column='validity_period')
559
+
560
+
561
+ 3. Saving a teradataml DataFrame as a PTI table:
562
+
563
+ >>> from teradataml.dataframe.dataframe import DataFrame
564
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
565
+ >>> from teradataml.data.load_example_data import load_example_data
566
+
567
+ >>> load_example_data("sessionize", "sessionize_table")
568
+ >>> df3 = DataFrame('sessionize_table')
569
+
570
+ a) Using copy_to_sql
571
+ >>> copy_to_sql(df3, "test_copyto_pti",
572
+ ... timecode_column='clicktime',
573
+ ... columns_list='event')
574
+
575
+ b) Alternatively, using DataFrame.to_sql
576
+ >>> df3.to_sql(table_name = "test_copyto_pti_1",
577
+ ... timecode_column='clicktime',
578
+ ... columns_list='event')
579
+
580
+ c) Saving as a SET table
581
+ >>> copy_to_sql(df3, "test_copyto_pti_2",
582
+ ... timecode_column='clicktime',
583
+ ... columns_list='event',
584
+ ... set_table=True)
585
+
586
+ """
587
+ # Accept valid_time_columns and derived_column from kwargs
588
+ valid_time_columns = kwargs.get("valid_time_columns", None)
589
+ derived_column = kwargs.get("derived_column", None)
590
+
591
+ # Deriving global connection using get_connection().
592
+ con = get_connection()
593
+
594
+ try:
595
+ if con is None:
596
+ raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE), MessageCodes.CONNECTION_FAILURE)
597
+
598
+ # Check if the table to be created must be a Primary Time Index (PTI) table.
599
+ # If a user specifies the timecode_column parameter, and attempt to create
600
+ # a PTI will be made.
601
+ is_pti = False
602
+ if timecode_column is not None:
603
+ is_pti = True
604
+ if primary_index is not None:
605
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
606
+ 'primary_index',
607
+ 'timecode_column',
608
+ 'specified'), stacklevel=2)
609
+ else:
610
+ ignored = []
611
+ if timezero_date is not None: ignored.append('timezero_date')
612
+ if timebucket_duration is not None: ignored.append('timebucket_duration')
613
+ if sequence_column is not None: ignored.append('sequence_column')
614
+ if seq_max is not None: ignored.append('seq_max')
615
+ if columns_list is not None and (
616
+ not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
617
+ if primary_time_index_name is not None: ignored.append('primary_time_index_name')
618
+ if len(ignored) > 0:
619
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
620
+ ignored,
621
+ 'timecode_column',
622
+ 'missing'), stacklevel=2)
623
+
624
+ # Unset schema_name when temporary is True since volatile tables are always in the user database
625
+ if temporary is True:
626
+ if schema_name is not None:
627
+ warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
628
+ 'schema_name',
629
+ 'temporary=True',
630
+ 'specified'), stacklevel=2)
631
+ schema_name = None
632
+
633
+ # Validate DataFrame & related flags; Proceed only when True
634
+ from teradataml.dataframe.data_transfer import _DataTransferUtils
635
+ dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name,
636
+ if_exists=if_exists, index=index, index_label=index_label,
637
+ primary_index=primary_index, temporary=temporary,
638
+ types=types, primary_time_index_name=primary_time_index_name,
639
+ timecode_column=timecode_column,
640
+ timebucket_duration=timebucket_duration,
641
+ timezero_date=timezero_date, columns_list=columns_list,
642
+ sequence_column=sequence_column, seq_max=seq_max,
643
+ set_table=set_table, api_name='copy_to',
644
+ chunksize=chunksize, match_column_order=match_column_order)
645
+
646
+ dt_obj._validate()
647
+
648
+ # Validate partition arguments
649
+ _validate_partition_arguments(partition_by=partition_by,
650
+ partition_by_case=partition_by_case,
651
+ partition_by_range=partition_by_range,
652
+ sub_partition=sub_partition)
653
+
654
+ # If the table created must be a PTI table, then validate additional parameters
655
+ # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
656
+ # will be ignored - for example, primary_index
657
+ if is_pti:
658
+ _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
659
+ timezero_date, primary_time_index_name, columns_list,
660
+ sequence_column, seq_max, types, index, index_label)
661
+
662
+ # A table cannot be a SET table and have NO PRIMARY INDEX
663
+ if set_table and primary_index is None and timecode_column is None:
664
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
665
+ MessageCodes.SET_TABLE_NO_PI)
666
+
667
+ # Check whether valid time columns are passed to consider it as temporal table.
668
+ is_temporal = False
669
+ if valid_time_columns is not None:
670
+ _validate_valid_time_columns(df, valid_time_columns, derived_column,types)
671
+ is_temporal = True
672
+
673
+
674
+ # Check if destination table exists
675
+ table_exists = dt_obj._table_exists(con)
676
+
677
+ # Raise an exception when the table exists and if_exists = 'fail'
678
+ dt_obj._check_table_exists(is_table_exists=table_exists)
679
+
680
+ # Is the input DataFrame a Pandas DataFrame?
681
+ is_pandas_df = isinstance(df, pd.DataFrame)
682
+
683
+ # Let's also execute the node and set the table_name when df is teradataml DataFrame
684
+ if not is_pandas_df and df._table_name is None:
685
+ df._table_name = df_utils._execute_node_return_db_object_name(df._nodeid, df._metaexpr)
686
+
687
+ # Check table name conflict is present.
688
+ is_conflict = _check_table_name_conflict(df, table_name) if isinstance(df, tdmldf.DataFrame) and \
689
+ if_exists.lower() == 'replace' else False
690
+
691
+ # Create a temporary table name, When table name conflict is present.
692
+ if is_conflict:
693
+ # Store actual destination table name for later use.
694
+ dest_table_name = table_name
695
+ table_name = UtilFuncs._generate_temp_table_name(prefix=table_name,
696
+ table_type=TeradataConstants.TERADATA_TABLE,
697
+ quote=False)
698
+
699
+ # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
700
+ # table name in fully qualified format. Because of this , test cases started
701
+ # failing with Blank name in quotation mark. Hence, extracted only the table name.
702
+ table_name = UtilFuncs._extract_table_name(table_name)
703
+
704
+ partition_exp, partition_func = _build_partition_expression(partition_by=partition_by,
705
+ partition_by_case=partition_by_case,
706
+ partition_by_range=partition_by_range,
707
+ sub_partition=sub_partition)
708
+
709
+ # Let's create the SQLAlchemy table object to recreate the table
710
+ if not table_exists or if_exists.lower() == 'replace':
711
+ if is_temporal:
712
+ _create_temporal_table(df, table_name, con, primary_index,
713
+ schema_name, valid_time_columns, derived_column,
714
+ types, None if not is_pandas_df else index,
715
+ None if not is_pandas_df else index_label)
716
+ else:
717
+ if is_pti:
718
+ table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
719
+ primary_time_index_name, timecode_column, timezero_date,
720
+ timebucket_duration, sequence_column, seq_max,
721
+ columns_list, set_table, types,
722
+ None if not is_pandas_df else index,
723
+ None if not is_pandas_df else index_label)
724
+ else:
725
+ table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
726
+ types, None if not is_pandas_df else index,
727
+ None if not is_pandas_df else index_label,
728
+ partition_expression=partition_exp,
729
+ partition_function=partition_func
730
+ )
731
+
732
+ if table is not None:
733
+ # If the table need to be replaced and there is no table name conflict,
734
+ # let's drop the existing table first
735
+ if table_exists and not is_conflict:
736
+ tbl_name = dt_obj._get_fully_qualified_table_name()
737
+ UtilFuncs._drop_table(tbl_name)
738
+ try:
739
+ table.create(bind=get_context())
740
+ except sqlachemyOperationalError as err:
741
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
742
+ '\n' + str(err),
743
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
744
+ else:
745
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
746
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
747
+
748
+ # Check column compatibility for insertion when table exists and if_exists = 'append'
749
+ if table_exists and if_exists.lower() == 'append':
750
+ UtilFuncs._get_warnings('set_table', set_table, 'if_exists', 'append')
751
+
752
+ table = UtilFuncs._get_sqlalchemy_table(table_name,
753
+ schema_name=schema_name)
754
+
755
+ if table is not None:
756
+ # ELE-2284
757
+ # We are not considering types for 'append' mode as it is a simple insert and no casting is applied
758
+ if is_pandas_df:
759
+ cols = _extract_column_info(df, index=index, index_label=index_label)
760
+ else:
761
+ cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
762
+ if match_column_order:
763
+ cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
764
+ is_pti, timecode_column, sequence_column, derived_column)
765
+
766
+ if not cols_compatible:
767
+ raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
768
+ MessageCodes.INSERTION_INCOMPATIBLE)
769
+
770
+ # df is a Pandas DataFrame object
771
+ if isinstance(df, pd.DataFrame):
772
+ if not table_exists or if_exists.lower() == 'replace':
773
+ try:
774
+ # Support for saving Pandas index/Volatile is by manually inserting rows (batch) for now
775
+ if index or is_pti:
776
+ _insert_from_dataframe(df, con, schema_name, table_name, index,
777
+ chunksize, is_pti, timecode_column,
778
+ sequence_column, match_column_order)
779
+
780
+ # When index isn't saved & for non-PTI tables, to_sql insertion used (batch)
781
+ else:
782
+ # Empty queryband buffer before SQL call.
783
+ UtilFuncs._set_queryband()
784
+ df.to_sql(table_name, get_context(), if_exists='append', index=False, index_label=None,
785
+ chunksize=chunksize, schema=schema_name)
786
+
787
+ except sqlachemyOperationalError as err:
788
+ if "Duplicate row error" in str(err):
789
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_DUPICATE_ROW,
790
+ table_name),
791
+ MessageCodes.SET_TABLE_DUPICATE_ROW)
792
+ else:
793
+ raise
794
+
795
+ elif table_exists and if_exists.lower() == 'append':
796
+ _insert_from_dataframe(df, con, schema_name, table_name, index,
797
+ chunksize, is_pti, timecode_column,
798
+ sequence_column, match_column_order)
799
+
800
+ # df is a teradataml DataFrame object (to_sql wrapper used)
801
+ elif isinstance(df, tdmldf.DataFrame):
802
+ df_column_list = [col.name for col in df._metaexpr.c]
803
+
804
+ if is_pti:
805
+ # Reorder the column list to reposition the timecode and sequence columns
806
+ df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
807
+
808
+ source_tbl_name = UtilFuncs._extract_table_name(df._table_name)
809
+ from_schema_name = UtilFuncs._extract_db_name(df._table_name)
810
+
811
+ df_utils._insert_all_from_table(table_name, source_tbl_name, df_column_list,
812
+ to_schema_name=schema_name,
813
+ from_schema_name=from_schema_name,
814
+ temporary=temporary)
815
+
816
+ # While table name conflict is present, Delete the source table after creation of temporary table.
817
+ # Rename the temporary table to destination table name.
818
+ if is_conflict and if_exists.lower() == 'replace':
819
+ tbl_name = dt_obj._get_fully_qualified_table_name()
820
+ UtilFuncs._drop_table(tbl_name)
821
+ _rename_table(table_name, dest_table_name)
822
+
823
+
824
+ except (TeradataMlException, ValueError, TypeError):
825
+ raise
826
+ except Exception as err:
827
+ raise TeradataMlException(Messages.get_message(MessageCodes.COPY_TO_SQL_FAIL) + str(err),
828
+ MessageCodes.COPY_TO_SQL_FAIL) from err
829
+
830
+
831
+ def _check_table_name_conflict(df, table_name):
832
+ """
833
+ Check whether destination "table_name" matches with the teradataml dataframe parent nodes.
834
+ This function traverse the DAG graph from child node to root node and checks for table name conflict.
835
+
836
+ PARAMETERS:
837
+ df:
838
+ Required Argument.
839
+ Specifies the teradataml DataFrame object to be checked.
840
+ Types: teradataml.dataframe.dataframe.DataFrame
841
+
842
+ table_name:
843
+ Required Argument.
844
+ Specifies the name of the table to be created in Vantage.
845
+ Types : String
846
+
847
+ RETURNS:
848
+ A boolean value representing the presence of conflict.
849
+
850
+ RAISES:
851
+ None
852
+
853
+ EXAMPLES:
854
+ >>> df = DataFrame("sales")
855
+ >>> table_name = "destination_table"
856
+ >>> _check_table_name_conflict(df, table_name)
857
+ """
858
+ aed_obj = AedUtils()
859
+ # Check if length of parent node count greater that 0.
860
+ if aed_obj._aed_get_parent_node_count(df._nodeid) > 0:
861
+ # Let's check "table_name" matches with any of the parent nodes table name.
862
+ # Get current table node id.
863
+ node_id = df._nodeid
864
+ while node_id:
865
+
866
+ # Get the parent node id using current table node id.
867
+ parent_node_id = aed_obj._aed_get_parent_nodeids(node_id)
868
+
869
+ if parent_node_id:
870
+ # Check "table_name" matches with the parent "table_name".
871
+ # If table name matches, then return 'True'.
872
+ # Otherwise, Traverse the graph from current node to the top most root node.
873
+ if table_name in aed_obj._aed_get_source_tablename(parent_node_id[0]):
874
+ return True
875
+ else:
876
+ node_id = parent_node_id[0]
877
+ else:
878
+ # When parent_node_id is empty return 'False'.
879
+ return False
880
+ return False
881
+
882
+
883
+ def _get_sqlalchemy_table_from_tdmldf(df, meta):
884
+ """
885
+ This is an internal function used to generate an SQLAlchemy Table
886
+ object for the underlying table/view of a DataFrame.
887
+
888
+ PARAMETERS:
889
+ df:
890
+ The teradataml DataFrame to generate the SQLAlchemy.Table object for.
891
+
892
+ meta:
893
+ The SQLAlchemy.Metadata object.
894
+
895
+ RETURNS:
896
+ SQLAlchemy.Table
897
+
898
+ RAISES:
899
+ None
900
+
901
+ EXAMPLES:
902
+ >>> con = get_connection()
903
+ >>> df = DataFrame('admissions_train')
904
+ >>> meta = sqlalchemy.MetaData()
905
+ >>> table = __get_sqlalchemy_table_from_tdmldf(df, meta)
906
+
907
+ """
908
+ con = get_connection()
909
+ db_schema = UtilFuncs._extract_db_name(df._table_name)
910
+ db_table_name = UtilFuncs._extract_table_name(df._table_name)
911
+
912
+ return Table(db_table_name, meta, schema=db_schema, autoload_with=get_context())
913
+
914
+
915
+ def _get_index_labels(df, index_label):
916
+ """
917
+ Internal function to construct a list of labels for the indices to be saved from the Pandas DataFrames
918
+ based on user input and information from the DataFrame.
919
+
920
+ PARAMETERS:
921
+ df:
922
+ The Pandas input DataFrame.
923
+
924
+ index_label:
925
+ The user provided label(s) for the indices.
926
+
927
+ RAISES:
928
+ None
929
+
930
+ RETURNS:
931
+ A list of Strings corresponding the to labels for the indices to add as columns.
932
+
933
+ EXAMPLES:
934
+ _get_index_labels(df, index_label)
935
+ """
936
+ default_index_label = 'index_label'
937
+ default_level_prefix = 'level_'
938
+ level_cnt = 0
939
+
940
+ is_multi_index = isinstance(df.index, pd.MultiIndex)
941
+ ind_types = [level.dtype for level in df.index.levels] if is_multi_index else [df.index.dtype]
942
+
943
+ ind_names = []
944
+ if index_label:
945
+ ind_names = [index_label] if isinstance(index_label, str) else index_label
946
+ else:
947
+ for name in df.index.names:
948
+ if name not in ('', None):
949
+ ind_names.append(name)
950
+ else:
951
+ if is_multi_index:
952
+ ind_names.append(default_level_prefix + str(level_cnt))
953
+ level_cnt = level_cnt + 1
954
+ else:
955
+ df_columns = _get_pd_df_column_names(df)
956
+ label = default_level_prefix + str(level_cnt) if default_index_label in df_columns else default_index_label
957
+ ind_names.append(label)
958
+
959
+ return ind_names, ind_types
960
+
961
+ def _validate_partition_arguments(partition_by=None,
962
+ partition_by_case=None,
963
+ partition_by_range=None,
964
+ sub_partition=None):
965
+ """
966
+ DESCRIPTION:
967
+ Internal function to validate the partition_by arguments.
968
+
969
+ PARAMETERS:
970
+ partition_by:
971
+ Optional argument.
972
+ Specifies the columns on which PARTITION BY should be created.
973
+ Types: str or ColumnExpression
974
+
975
+ partition_by_case:
976
+ Optional argument.
977
+ Specifies different cases to partition the index.
978
+ Types: str or ColumnExpression or tuple of ColumnExpression, str
979
+
980
+ partition_by_range:
981
+ Optional argument.
982
+ Specifies the range of values of Date columns on which partition to be created.
983
+ Types: str or ColumnExpression
984
+
985
+ sub_partition:
986
+ Optional argument.
987
+ Specifies the details to subpartition the main partition according to the value provided.
988
+ Types: int or Teradata Interval datatypes
989
+
990
+
991
+ RETURNS:
992
+ None
993
+
994
+ RAISES:
995
+ TeradataMlException
996
+
997
+ EXAMPLES:
998
+ >>> _validate_partition_arguments(partition_by='col1')
999
+ >>> _validate_partition_arguments(partition_by_case=(df.col1 < 100, df.col1 < 1000))
1000
+ """
1001
+ # Validate partition_by argument
1002
+ arg_matrix = []
1003
+ arg_matrix.append(['partition_by', partition_by, True, (str, ColumnExpression), True])
1004
+ arg_matrix.append(['partition_by_case', partition_by_case, True, (ColumnExpression, str, _TupleOf((str, ColumnExpression))), True])
1005
+ arg_matrix.append(['partition_by_range', partition_by_range, True, (ColumnExpression, str), True])
1006
+ arg_matrix.append(['sub_partition', sub_partition, True, (int, TeradataTypes.TD_RANGE_N_CLAUSE_TYPES.value), True])
1007
+
1008
+ # Validate argument types
1009
+ _Validators._validate_function_arguments(arg_matrix)
1010
+
1011
+ # Validate mutually exclusive arguments
1012
+ _Validators._validate_mutually_exclusive_argument_groups({"partition_by":partition_by},
1013
+ {"partition_by_case":partition_by_case},
1014
+ {"partition_by_range":partition_by_range})
1015
+
1016
+ def _build_partition_expression(partition_by=None,
1017
+ partition_by_case=None,
1018
+ partition_by_range=None,
1019
+ sub_partition=None):
1020
+ """
1021
+ DESCRIPTION:
1022
+ Internal function to build the partitioning expression for the table.
1023
+
1024
+ PARAMETERS:
1025
+ partition_by:
1026
+ Optional argument.
1027
+ Specifies the columns on which PARTITION BY should be created.
1028
+ Types: str or ColumnExpression
1029
+
1030
+ partition_by_case:
1031
+ Optional argument.
1032
+ Specifies different cases to partition the index.
1033
+ Types: str or ColumnExpression or tuple of ColumnExpression, str
1034
+
1035
+ partition_by_range:
1036
+ Optional argument.
1037
+ Specifies the range of values of Date columns on which partition to be created.
1038
+ Types: str or ColumnExpression
1039
+
1040
+ sub_partition:
1041
+ Optional argument.
1042
+ Specifies the details to subpartition the main partition according to the value provided.
1043
+ Types: int or Teradata Interval datatypes
1044
+
1045
+ RAISES:
1046
+ None
1047
+
1048
+ RETURNS:
1049
+ strings containing the partitioning expression and partition function.
1050
+
1051
+ EXAMPLES:
1052
+ >>> _build_partition_expression(partition_by='col1')
1053
+ >>> _build_partition_expression(partition_by_case=(df.col1 < 100, df.col1 < 1000))
1054
+
1055
+ """
1056
+ partition_exp = None
1057
+ partition_fn = None
1058
+ # Check if partition_by expression is a ColumnExpression,
1059
+ # if so, compile it to a string
1060
+ if partition_by:
1061
+ partition_exp = partition_by.compile() if isinstance(partition_by, ColumnExpression) \
1062
+ else partition_by
1063
+
1064
+ # Check if partition_by_case is a ColumnExpression or string,
1065
+ # if string, join to partition_by expression
1066
+ # if ColumnExpression, compile it to a string and join to partition_by expression
1067
+ # if tuple, compile each expression to a string and join to partition_by expression
1068
+ if partition_by_case:
1069
+ partition_fn = "CASE_N"
1070
+ partition_by_case = [partition_by_case] if isinstance(partition_by_case, (str, ColumnExpression)) \
1071
+ else partition_by_case
1072
+ partition_exp = "{}, NO CASE, UNKNOWN".format(
1073
+ ", ".join(str(exp.compile()) if isinstance(exp, ColumnExpression) else str(exp)
1074
+ for exp in partition_by_case))
1075
+
1076
+ # Check if partition_by_range is a ColumnExpression or string,
1077
+ # if so, compile it to a string
1078
+ if partition_by_range:
1079
+ partition_fn = "RANGE_N"
1080
+ sub_partition_clause = ""
1081
+ if isinstance(partition_by_range, ColumnExpression):
1082
+ partition_by_range = partition_by_range.compile()
1083
+
1084
+ # Check if sub_partition provided,
1085
+ # if so, complie the EACH clause for RANGE_N
1086
+ # If sub_partition is an int, the convert to string and add to the clause.
1087
+ # If sub_partition is a TeradataTypes.TD_RANGE_N_CLAUSE_TYPES,
1088
+ # convert to string and extract the precision and add to the clause.
1089
+ if sub_partition:
1090
+ sub_partition_clause = (
1091
+ f" EACH {str(sub_partition)}"
1092
+ if isinstance(sub_partition, int)
1093
+ else f" EACH INTERVAL '{sub_partition.precision}' {str(sub_partition).split(maxsplit=1)[1]}")
1094
+
1095
+ partition_exp = "{0}{1}".format(partition_by_range, sub_partition_clause)
1096
+ # Return partition_by expression and partition function
1097
+ return partition_exp, partition_fn
1098
+
1099
+
1100
+ def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
1101
+ timezero_date, primary_time_index_name, columns_list,
1102
+ sequence_column, seq_max, types, index, index_label):
1103
+ """
1104
+ This is an internal function used to validate the PTI part of copy request.
1105
+ Dataframe, connection & related parameters are checked.
1106
+ Saving to Vantage is proceeded to only when validation returns True.
1107
+
1108
+ PARAMETERS:
1109
+ df:
1110
+ The DataFrame (Pandas or teradataml) object to be saved.
1111
+
1112
+ timecode_column:
1113
+ The column in the DataFrame that reflects the form of the timestamp
1114
+ data in the time series.
1115
+ Type: String
1116
+
1117
+ timebucket_duration:
1118
+ A duration that serves to break up the time continuum in
1119
+ the time series data into discrete groups or buckets.
1120
+ Type: String
1121
+
1122
+ timezero_date:
1123
+ Specifies the earliest time series data that the PTI table will accept.
1124
+ Type: String
1125
+
1126
+ primary_time_index_name:
1127
+ A name for the Primary Time Index (PTI).
1128
+ Type: String
1129
+
1130
+ columns_list:
1131
+ A list of one or more PTI table column names.
1132
+ Type: String or list of Strings
1133
+
1134
+ sequence_column:
1135
+ Specifies a column of type Integer with sequences implying that the
1136
+ time series data readings are not unique.
1137
+ If not specified, the time series data are assumed to be unique in time.
1138
+ Type: String
1139
+
1140
+ seq_max:
1141
+ Specifies the maximum number of sensor data rows that can have the
1142
+ same timestamp. Can be used when 'sequenced' is True.
1143
+ Accepted range: 1 - 2147483647.
1144
+ Type: int
1145
+
1146
+ types:
1147
+ Dictionary specifying column-name to teradatasqlalchemy type-mapping.
1148
+
1149
+ index:
1150
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
1151
+ Type: bool
1152
+
1153
+ index_label:
1154
+ Column label for index column(s).
1155
+ Type: String
1156
+
1157
+ RETURNS:
1158
+ True, when all parameters are valid.
1159
+
1160
+ RAISES:
1161
+ TeradataMlException, when parameter validation fails.
1162
+
1163
+ EXAMPLES:
1164
+ _validate_pti_copy_parameters(df = my_df, timecode_column = 'ts', timbucket_duration = 'HOURS(2)')
1165
+ """
1166
+ if isinstance(df, pd.DataFrame):
1167
+ df_columns = _get_pd_df_column_names(df)
1168
+ else:
1169
+ df_columns = [col.name for col in df._metaexpr.c]
1170
+
1171
+ awu = AnalyticsWrapperUtils()
1172
+ awu_matrix = []
1173
+
1174
+ # The arguments added to awu_martix are:
1175
+ # arg_name, arg, is_optional, acceptable types
1176
+ # The value for is_optional is set to False when the argument
1177
+ # a) is a required argument
1178
+ # b) is not allowed to be None, even if it is optional
1179
+ awu_matrix.append(['timecode_column', timecode_column, False, (str)])
1180
+ awu_matrix.append(['columns_list', columns_list, True, (str, list)])
1181
+ awu_matrix.append(['timezero_date', timezero_date, True, (str)])
1182
+ awu_matrix.append(['timebucket_duration', timebucket_duration, True, (str)])
1183
+ awu_matrix.append(['primary_time_index_name', primary_time_index_name, True, (str)])
1184
+ awu_matrix.append(['sequence_column', sequence_column, True, (str)])
1185
+ awu_matrix.append(['seq_max', seq_max, True, (int)])
1186
+
1187
+ # Validate types
1188
+ awu._validate_argument_types(awu_matrix)
1189
+
1190
+ # Validate arg emtpy
1191
+ awu._validate_input_columns_not_empty(timecode_column, 'timecode_column')
1192
+ awu._validate_input_columns_not_empty(columns_list, 'columns_list')
1193
+ awu._validate_input_columns_not_empty(timezero_date, 'timezero_date')
1194
+ awu._validate_input_columns_not_empty(timebucket_duration, 'timebucket_duration')
1195
+ awu._validate_input_columns_not_empty(sequence_column, 'sequence_column')
1196
+
1197
+ # Validate all the required arguments and optional arguments when not none
1198
+ # First the timecode_column
1199
+ _validate_column_in_list_of_columns('df', df_columns, timecode_column, 'timecode_column')
1200
+ # Check the type of timecode_column
1201
+ _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES.value,
1202
+ types, index, index_label)
1203
+
1204
+ # timezero date
1205
+ _validate_timezero_date(timezero_date)
1206
+
1207
+ # timebucket duration
1208
+ _Validators._validate_timebucket_duration(timebucket_duration)
1209
+
1210
+ # Validate sequence_column
1211
+ if sequence_column is not None:
1212
+ _validate_column_in_list_of_columns('df', df_columns, sequence_column, 'sequence_column')
1213
+ # Check the type of sequence_column
1214
+ _validate_column_type(df, sequence_column, 'sequence_column',
1215
+ PTITableConstants.VALID_SEQUENCE_COL_DATATYPES.value, types, index, index_label)
1216
+
1217
+ # Validate seq_max
1218
+ if seq_max is not None and (seq_max < 1 or seq_max > 2147483647):
1219
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(seq_max, 'seq_max', '1 < integer < 2147483647'),
1220
+ MessageCodes.INVALID_ARG_VALUE)
1221
+
1222
+ # Validate cols_list
1223
+ _validate_columns_list('df', df_columns, columns_list)
1224
+ if isinstance(columns_list, str):
1225
+ columns_list = [columns_list]
1226
+
1227
+ # Either one or both of timebucket_duration and columns_list must be specified
1228
+ if timebucket_duration is None and (columns_list is None or len(columns_list) == 0):
1229
+ raise TeradataMlException(
1230
+ Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, 'timebucket_duration', 'columns_list'),
1231
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
1232
+
1233
+
1234
+ def _validate_columns_list(df, df_columns, columns_list):
1235
+ """
1236
+ Internal function to validate columns list specified when creating a
1237
+ Primary Time Index (PTI) table.
1238
+
1239
+ PARAMETERS:
1240
+ df:
1241
+ Name of the DataFrame to which the column being validated
1242
+ does or should belong.
1243
+
1244
+ df_columns:
1245
+ List of columns in the DataFrame.
1246
+
1247
+ columns_list:
1248
+ The column or list of columns.
1249
+ Type: String or list of Strings
1250
+
1251
+ RETURNS:
1252
+ True if the column or list of columns is valid.
1253
+
1254
+ RAISES:
1255
+ Raise TeradataMlException on validation failure.
1256
+ """
1257
+ if columns_list is None:
1258
+ return True
1259
+
1260
+ # Validate DF has columns
1261
+ if isinstance(columns_list, str):
1262
+ columns_list = [columns_list]
1263
+
1264
+ for col in columns_list:
1265
+ _validate_column_in_list_of_columns(df, df_columns, col, 'columns_list')
1266
+
1267
+ return True
1268
+
1269
+
1270
+ def _validate_column_in_list_of_columns(df, df_columns, col, col_arg):
1271
+ """
1272
+ Internal function to validate the arguments used to specify
1273
+ a column name in DataFrame.
1274
+
1275
+ PARAMETERS:
1276
+ df:
1277
+ Name of the DataFrame to which the column being validated
1278
+ does or should belong.
1279
+
1280
+ df_column_list:
1281
+ List of columns in the DataFrame.
1282
+
1283
+ col:
1284
+ Column to be validated.
1285
+
1286
+ col_arg:
1287
+ Name of argument used to specify the column name.
1288
+
1289
+ RETURNS:
1290
+ True, if column name is a valid.
1291
+
1292
+ RAISES:
1293
+ TeradataMlException if invalid column name.
1294
+ """
1295
+ if col not in df_columns:
1296
+ raise TeradataMlException(
1297
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(col,
1298
+ col_arg,
1299
+ df,
1300
+ 'DataFrame'),
1301
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND)
1302
+
1303
+ return True
1304
+
1305
+
1306
+ def _validate_column_type(df, col, col_arg, expected_types, types = None, index = False, index_label = None):
1307
+ """
1308
+ Internal function to validate the type of an input DataFrame column against
1309
+ a list of expected types.
1310
+
1311
+ PARAMETERS
1312
+ df:
1313
+ Input DataFrame (Pandas or teradataml) which has the column to be tested
1314
+ for type.
1315
+
1316
+ col:
1317
+ The column in the input DataFrame to be tested for type.
1318
+
1319
+ col_arg:
1320
+ The name of the argument used to pass the column name.
1321
+
1322
+ expected_types:
1323
+ Specifies a list of teradatasqlachemy datatypes that the column is
1324
+ expected to be of type.
1325
+
1326
+ types:
1327
+ Dictionary specifying column-name to teradatasqlalchemy type-mapping.
1328
+
1329
+ RETURNS:
1330
+ True, when the columns is of an expected type.
1331
+
1332
+ RAISES:
1333
+ TeradataMlException, when the columns is not one of the expected types.
1334
+
1335
+ EXAMPLES:
1336
+ _validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES, types)
1337
+ """
1338
+ # Check if sequence_column is being translated to a valid_type
1339
+ if types is not None and col in types:
1340
+ if not any(isinstance(types[col], expected_type) for expected_type in expected_types):
1341
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
1342
+ format(col_arg, types[col], ' or '.join(expected_type.__visit_name__
1343
+ for expected_type in expected_types)),
1344
+ MessageCodes.INVALID_COLUMN_TYPE)
1345
+ # Else we need to copy without any casting
1346
+ elif isinstance(df, pd.DataFrame):
1347
+ t = _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
1348
+ if t not in expected_types:
1349
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
1350
+ format(col_arg, t, ' or '.join(expected_type.__visit_name__
1351
+ for expected_type in expected_types)),
1352
+ MessageCodes.INVALID_COLUMN_TYPE)
1353
+ elif not any(isinstance(df[col].type, t) for t in expected_types):
1354
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
1355
+ format(col_arg, df[col].type, ' or '.join(expected_type.__visit_name__
1356
+ for expected_type in expected_types)),
1357
+ MessageCodes.INVALID_COLUMN_TYPE)
1358
+
1359
+ return True
1360
+
1361
+
1362
+ def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
1363
+ index_label=None, partition_expression=None, partition_function=None):
1364
+ """
1365
+ This is an internal function used to construct a SQLAlchemy Table Object.
1366
+ This function checks appropriate flags and supports creation of Teradata
1367
+ specific Table constructs such as Volatile/Primary Index tables.
1368
+
1369
+
1370
+ PARAMETERS:
1371
+ df:
1372
+ The teradataml or Pandas DataFrame object to be saved.
1373
+
1374
+ table_name:
1375
+ Name of SQL table.
1376
+
1377
+ con:
1378
+ A SQLAlchemy connectable (engine/connection) object
1379
+
1380
+ primary_index:
1381
+ Creates Teradata Table(s) with Primary index column if specified.
1382
+
1383
+ temporary:
1384
+ Flag specifying whether SQL table to be created is Volatile or not.
1385
+
1386
+ schema_name:
1387
+ Specifies the name of the SQL schema in the database to write to.
1388
+
1389
+ set_table:
1390
+ A flag specifying whether to create a SET table or a MULTISET table.
1391
+ When True, an attempt to create a SET table is made.
1392
+ When False, an attempt to create a MULTISET table is made.
1393
+
1394
+ partition_expression:
1395
+ Specifies the partitioning expression to be used for partition by clause.
1396
+
1397
+ partition_function:
1398
+ Specifies the partitioning function to be used with partition by clause.
1399
+
1400
+ types:
1401
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1402
+
1403
+ index:
1404
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1405
+
1406
+ index_label:
1407
+ Column label(s) for index column(s).
1408
+
1409
+ RETURNS:
1410
+ SQLAlchemy Table
1411
+
1412
+ RAISES:
1413
+ N/A
1414
+
1415
+ EXAMPLES:
1416
+ _create_table_object(df = my_df, table_name = 'test_table', con = tdconnection, primary_index = None,
1417
+ temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1418
+ _create_table_object(df = csv_filepath, table_name = 'test_table', con = tdconnection, primary_index = None,
1419
+ temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
1420
+ """
1421
+ # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1422
+ post_params = {}
1423
+ prefix = []
1424
+ pti = post(opts=post_params)
1425
+
1426
+ if temporary is True:
1427
+ pti = pti.on_commit(option='preserve')
1428
+ prefix.append('VOLATILE')
1429
+
1430
+ if not set_table:
1431
+ prefix.append('multiset')
1432
+ else:
1433
+ prefix.append('set')
1434
+
1435
+ meta = MetaData()
1436
+ meta.bind = con
1437
+
1438
+ if isinstance(df, pd.DataFrame):
1439
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1440
+ elif isinstance(df, str):
1441
+ col_names, col_types = _extract_column_info(df, types)
1442
+ else:
1443
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1444
+ if types is not None:
1445
+ # When user-type provided use, or default when partial types provided.
1446
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1447
+
1448
+ if primary_index is not None:
1449
+ if isinstance(primary_index, list):
1450
+ pti = pti.primary_index(unique=False, cols=primary_index)
1451
+ elif isinstance(primary_index, str):
1452
+ pti = pti.primary_index(unique=False, cols=[primary_index])
1453
+ else:
1454
+ pti = pti.no_primary_index()
1455
+
1456
+ # Partitioning expression and function
1457
+ if partition_expression:
1458
+ pti = pti.partition_by(partition_expression=partition_expression,
1459
+ partition_fn=partition_function)
1460
+
1461
+ # Create default Table construct with parameter dictionary
1462
+ table = Table(table_name, meta,
1463
+ *(Column(col_name, col_type)
1464
+ for col_name, col_type in
1465
+ zip(col_names, col_types)),
1466
+ teradatasql_post_create=pti,
1467
+ prefixes=prefix,
1468
+ schema=schema_name
1469
+ )
1470
+
1471
+ return table
1472
+
1473
+
1474
+ def _create_pti_table_object(df, con, table_name, schema_name, temporary, primary_time_index_name,
1475
+ timecode_column, timezero_date, timebucket_duration,
1476
+ sequence_column, seq_max, columns_list, set_table, types, index=None, index_label=None):
1477
+ """
1478
+ This is an internal function used to construct a SQLAlchemy Table Object.
1479
+ This function checks appropriate flags and supports creation of Teradata
1480
+ specific Table constructs such as Volatile and Primary Time Index tables.
1481
+
1482
+ PARAMETERS:
1483
+ df:
1484
+ The teradataml or Pandas DataFrame object to be saved.
1485
+
1486
+ con:
1487
+ A SQLAlchemy connectable (engine/connection) object
1488
+
1489
+ table_name:
1490
+ Name of SQL table.
1491
+
1492
+ schema_name:
1493
+ Specifies the name of the SQL schema in the database to write to.
1494
+
1495
+ temporary:
1496
+ Flag specifying whether SQL table to be created is Volatile or not.
1497
+
1498
+ primary_time_index_name:
1499
+ A name for the Primary Time Index (PTI).
1500
+
1501
+ timecode_column:
1502
+ The column in the DataFrame that reflects the form of the timestamp
1503
+ data in the time series.
1504
+
1505
+ timezero_date:
1506
+ Specifies the earliest time series data that the PTI table will accept.
1507
+
1508
+ timebucket_duration:
1509
+ A duration that serves to break up the time continuum in
1510
+ the time series data into discrete groups or buckets.
1511
+
1512
+ sequence_column:
1513
+ Specifies a column with sequences implying that time series data
1514
+ readings are not unique. If not specified, the time series data are
1515
+ assumed to be unique.
1516
+
1517
+ seq_max:
1518
+ Specifies the maximum number of sensor data rows that can have the
1519
+ same timestamp. Can be used when 'sequenced' is True.
1520
+
1521
+ columns_list:
1522
+ A list of one or more PTI table column names.
1523
+
1524
+ set_table:
1525
+ A flag specifying whether to create a SET table or a MULTISET table.
1526
+ When True, an attempt to create a SET table is made.
1527
+ When False, an attempt to create a MULTISET table is made.
1528
+
1529
+ types:
1530
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1531
+
1532
+ index:
1533
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
1534
+
1535
+ index_label:
1536
+ Column label for index column(s).
1537
+
1538
+ RETURNS:
1539
+ SQLAlchemy Table
1540
+
1541
+ RAISES:
1542
+ N/A
1543
+
1544
+ EXAMPLES:
1545
+ _create_pti_table_object(df = my_df, table_name = 'test_table', con = tdconnection,
1546
+ timecode_column = 'ts', columns_list = ['user_id', 'location'])
1547
+
1548
+ """
1549
+ meta = MetaData()
1550
+
1551
+ if isinstance(df, pd.DataFrame):
1552
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1553
+ timecode_datatype = col_types[col_names.index(timecode_column)]()
1554
+ else:
1555
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1556
+ if types is not None:
1557
+ # When user-type provided use, or default when partial types provided
1558
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1559
+ timecode_datatype = df[timecode_column].type
1560
+
1561
+ # Remove timecode and sequence column from col_name and col_types
1562
+ # since the required columns will be created automatically
1563
+ if timecode_column in col_names:
1564
+ ind = col_names.index(timecode_column)
1565
+ col_names.pop(ind)
1566
+ col_types.pop(ind)
1567
+
1568
+ if sequence_column is not None and sequence_column in col_names:
1569
+ ind = col_names.index(sequence_column)
1570
+ col_names.pop(ind)
1571
+ col_types.pop(ind)
1572
+
1573
+ # Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
1574
+ post_params = {}
1575
+ prefix = []
1576
+ pti = post(opts=post_params)
1577
+
1578
+ # Create Table object with appropriate Primary Time Index/Prefix for volatile
1579
+ if temporary:
1580
+ pti = pti.on_commit(option='preserve')
1581
+ prefix.append('VOLATILE')
1582
+
1583
+ if not set_table:
1584
+ prefix.append('multiset')
1585
+ else:
1586
+ prefix.append('set')
1587
+
1588
+ pti = pti.primary_time_index(timecode_datatype,
1589
+ name=primary_time_index_name,
1590
+ timezero_date=timezero_date,
1591
+ timebucket_duration=timebucket_duration,
1592
+ sequenced=True if sequence_column is not None else False,
1593
+ seq_max=seq_max,
1594
+ cols=columns_list)
1595
+
1596
+ table = Table(table_name, meta,
1597
+ *(Column(col_name, col_type)
1598
+ for col_name, col_type in
1599
+ zip(col_names, col_types)),
1600
+ teradatasql_post_create=pti,
1601
+ prefixes=prefix,
1602
+ schema=schema_name
1603
+ )
1604
+
1605
+ return table
1606
+
1607
+ def _create_temporal_table(df, table_name, con, primary_index, schema_name,
1608
+ valid_time_columns, derived_column, types, index=None, index_label=None):
1609
+ """
1610
+ This is an internal function used to construct a CREATE TABLE statement for a Teradata temporal table.
1611
+ Supports creation of tables with a PERIOD FOR derived column using the specified valid time columns.
1612
+
1613
+ PARAMETERS:
1614
+ df:
1615
+ Required Arugment.
1616
+ The teradataml or Pandas DataFrame object to be saved.
1617
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
1618
+
1619
+ table_name:
1620
+ Required Argument.
1621
+ Name of SQL table.
1622
+ Types: String
1623
+
1624
+ con:
1625
+ Optional Argument.
1626
+ A SQLAlchemy connectable (engine/connection) object.
1627
+ Types: SQLAlchemy Engine or Connection
1628
+
1629
+ primary_index:
1630
+ Optional Argument.
1631
+ Creates Teradata Table(s) with Primary index column if specified.
1632
+ Types: String or list of Strings
1633
+
1634
+ schema_name:
1635
+ Optional Argument.
1636
+ Specifies the name of the SQL schema in the database to write to.
1637
+ Types: String
1638
+
1639
+ valid_time_columns:
1640
+ Required Argument.
1641
+ Specifies a tuple of two column names representing the temporal validity period.
1642
+ Types: tuple of Strings or str
1643
+
1644
+ derived_column:
1645
+ Optional Argument.
1646
+ Specifies the name of the derived PERIOD FOR column to be created.
1647
+ Types: String
1648
+
1649
+ types:
1650
+ Optional Argument.
1651
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1652
+ Types: dict
1653
+
1654
+ index:
1655
+ Optional Argument.
1656
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1657
+ Types: Boolean
1658
+
1659
+ index_label:
1660
+ Optional Argument.
1661
+ Column label(s) for index column(s).
1662
+ Types: String or list of Strings
1663
+
1664
+ RETURNS:
1665
+ None
1666
+
1667
+ RAISES:
1668
+ TeradataMlException
1669
+
1670
+ EXAMPLES:
1671
+ _create_temporal_table(
1672
+ df=my_df,
1673
+ table_name='temporal_table',
1674
+ con=td_connection,
1675
+ primary_index=['id'],
1676
+ schema_name='my_schema',
1677
+ valid_time_columns=('start_date', 'end_date'),
1678
+ derived_column='validity_period',
1679
+ types={'id': INTEGER, 'start_date': DATE, 'end_date': DATE},
1680
+ index=False,
1681
+ index_label=None
1682
+ )
1683
+
1684
+ """
1685
+
1686
+ # Extract column names and types
1687
+ if isinstance(df, pd.DataFrame):
1688
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1689
+ else:
1690
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1691
+ if types is not None:
1692
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1693
+
1694
+ columns_clause_ = []
1695
+ # Ensure all col_types are instances, not classes
1696
+ for i, col_type in enumerate(col_types):
1697
+ if isinstance(col_type, type):
1698
+ col_types[i] = col_type()
1699
+ # Use col_names and col_types to build the columns clause
1700
+ # Compile column types to string using the dialect of the current connection
1701
+ # Add NOT NULL to valid_time_columns
1702
+ for col_name, col_type in zip(col_names, col_types):
1703
+ col_def = '{} {}'.format(col_name, col_type.compile(dialect=td_dialect()))
1704
+
1705
+ if col_name in valid_time_columns:
1706
+ col_def += ' NOT NULL'
1707
+ if isinstance(col_type, (PERIOD_DATE, PERIOD_TIMESTAMP)):
1708
+ col_def += ' AS VALIDTIME'
1709
+ columns_clause_.append(col_def)
1710
+
1711
+ period_for_clause = []
1712
+ if isinstance(valid_time_columns, tuple):
1713
+ if derived_column is None:
1714
+ derived_column = "_".join(valid_time_columns)
1715
+ period_for_clause = ['PERIOD FOR {} ({}, {}) AS VALIDTIME'.format(
1716
+ derived_column, valid_time_columns[0], valid_time_columns[1])
1717
+ ]
1718
+ columns_clause = ",\n ".join(columns_clause_ + period_for_clause)
1719
+
1720
+ # Prepare primary index clause.
1721
+ if primary_index:
1722
+ primary_index_clause = "PRIMARY INDEX ({})".format(
1723
+ ", ".join(UtilFuncs._as_list(primary_index)))
1724
+ else:
1725
+ primary_index_clause = ""
1726
+
1727
+ # Prepare create table statement.
1728
+ table_name = UtilFuncs._get_qualified_table_name(schema_name, table_name) if\
1729
+ schema_name else table_name
1730
+ sql = """
1731
+ CREATE MULTISET TABLE {}
1732
+ (\n{}\n)\n{}
1733
+ """.format(table_name, columns_clause, primary_index_clause)
1734
+ try:
1735
+ execute_sql(sql)
1736
+ except Exception as err:
1737
+ raise TeradataMlException(
1738
+ Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
1739
+ '\n' + str(err),
1740
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED
1741
+ )
1742
+
1743
+
1744
+ def _rename_column(col_names, search_for, rename_to):
1745
+ """
1746
+ Internal function to rename a column in a list of columns of a Pandas DataFrame.
1747
+
1748
+ PARAMETERS:
1749
+ col_names:
1750
+ Required Argument.
1751
+ The list of column names of the Pandas DataFrame.
1752
+
1753
+ search_for:
1754
+ Required Argument.
1755
+ The column name that need to be changed/renamed.
1756
+
1757
+ rename_to:
1758
+ Required Argument.
1759
+ The column name that the 'search_for' column needs to be replaced with.
1760
+
1761
+ RETURNS:
1762
+ A list of renamed columns list.
1763
+
1764
+ EXAMPLES:
1765
+ cols = _rename_column(cols, 'col_1', 'new_col_1')
1766
+ """
1767
+ ind = col_names.index(search_for)
1768
+ col_names.pop(ind)
1769
+ col_names.insert(ind, rename_to)
1770
+
1771
+ return col_names
1772
+
1773
+
1774
+ def _rename_to_pti_columns(col_names, timecode_column, sequence_column,
1775
+ timecode_column_index=None, sequence_column_index=None):
1776
+ """
1777
+ Internal function to generate a list of renamed columns of a Pandas DataFrame to match that of the PTI table column names
1778
+ in Vantage, or revert any such changes made.
1779
+
1780
+ PARAMETERS:
1781
+ col_names:
1782
+ The list of column names of the Pandas DataFrame.
1783
+
1784
+ timecode_column:
1785
+ The column name that reflects the timecode column in the PTI table.
1786
+
1787
+ sequence_column:
1788
+ The column name that reflects the sequence column in the PTI table.
1789
+
1790
+ timecode_column_index:
1791
+ The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1792
+ performed.
1793
+
1794
+ sequence_column_index:
1795
+ The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
1796
+ performed.
1797
+
1798
+ RETURNS:
1799
+ A list of renamed PTI related columns.
1800
+
1801
+ EXAMPLES:
1802
+ cols = _rename_to_pti_columns(cols, timecode_column, sequence_column, t_index=None, s_index)
1803
+ cols = _rename_to_pti_columns(cols, timecode_column, sequence_column)
1804
+ """
1805
+ # Rename the timecode_column to what it is in Vantage
1806
+ if timecode_column_index is not None:
1807
+ col_names = _rename_column(col_names, PTITableConstants.TD_TIMECODE.value, timecode_column)
1808
+ else:
1809
+ col_names = _rename_column(col_names, timecode_column, PTITableConstants.TD_TIMECODE.value)
1810
+
1811
+ # Rename the sequence_column to what it is in Vantage
1812
+ if sequence_column is not None:
1813
+ if sequence_column_index is not None:
1814
+ col_names = _rename_column(col_names, PTITableConstants.TD_SEQNO.value, sequence_column)
1815
+ else:
1816
+ col_names = _rename_column(col_names, sequence_column, PTITableConstants.TD_SEQNO.value)
1817
+
1818
+ return col_names
1819
+
1820
+
1821
+ def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column, df_col_type_list = None):
1822
+ """
1823
+ Internal function to reorder the list of columns used to construct the 'INSERT INTO'
1824
+ statement as required when the target table is a PTI table.
1825
+
1826
+ PARAMETERS:
1827
+ df_column_list:
1828
+ A list of column names for the columns in the DataFrame.
1829
+
1830
+ timecode_column:
1831
+ The timecode_columns which should be moved to the first position.
1832
+
1833
+ sequence_column:
1834
+ The timecode_columns which should be moved to the first position.
1835
+
1836
+ df_col_type_list:
1837
+ Optionally reorder the list containing the types of the columns to match the
1838
+ reordering the of df_column_list.
1839
+
1840
+ RETURNS:
1841
+ A reordered list of columns names for the columns in the DataFrame.
1842
+ If the optional types list is also specified, then a tuple of the list reordered columns names
1843
+ and the list of the column types.
1844
+
1845
+ EXAMPLE:
1846
+ new_colname_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
1847
+ new_colname_list, new_type_list = _reorder_insert_list_for_pti(df_column_list, timecode_column,
1848
+ sequence_column, df_col_type_list)
1849
+ """
1850
+ # Reposition timecode (to the first) and sequence column (to the second)
1851
+ # in df_column_list
1852
+ timecode_column_index = df_column_list.index(timecode_column)
1853
+ df_column_list.insert(0, df_column_list.pop(timecode_column_index))
1854
+ if df_col_type_list is not None:
1855
+ df_col_type_list.insert(0, df_col_type_list.pop(timecode_column_index))
1856
+
1857
+ if sequence_column is not None:
1858
+ sequence_column_index = df_column_list.index(sequence_column)
1859
+ df_column_list.insert(1, df_column_list.pop(sequence_column_index))
1860
+ if df_col_type_list is not None:
1861
+ df_col_type_list.insert(0, df_col_type_list.pop(sequence_column_index))
1862
+
1863
+ if df_col_type_list is not None:
1864
+ return df_column_list, df_col_type_list
1865
+ else:
1866
+ return df_column_list
1867
+
1868
+
1869
+ def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
1870
+ is_pti=False, timecode_column=None, sequence_column=None, derived_column=None):
1871
+ """
1872
+ Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
1873
+ and check if the number of columns and their names are matching to determine table insertion compatibility.
1874
+
1875
+ PARAMETERS:
1876
+ table1_col_object:
1877
+ Specifies a list/collection of SQLAlchemy ColumnExpression Objects for first table.
1878
+
1879
+ table2_cols:
1880
+ Specifies a list of column names for second table (teradataml DataFrame).
1881
+
1882
+ is_pandas_df:
1883
+ Flag specifying whether the table objects to check are pandas DataFrames or not
1884
+ Default: False
1885
+ Note: When this flag is True, table2_cols is passed as a tuple object of
1886
+ ([column_names], [column_types])
1887
+
1888
+ is_pti:
1889
+ Boolean flag indicating if the target table is a PTI table.
1890
+
1891
+ timecode_column:
1892
+ timecode_column required to order the select expression for the insert.
1893
+ It should be the first column in the select expression.
1894
+
1895
+ sequence_column:
1896
+ sequence_column required to order the select expression for the insert.
1897
+ It should be the second column in the select expression.
1898
+
1899
+ derived_column:
1900
+ Specifies a derived column that is part of the table schema but not
1901
+ part of insert.
1902
+ Types: String
1903
+
1904
+ RETURNS:
1905
+ a) True, when insertion compatible (number of columns and their names match)
1906
+ b) False, otherwise
1907
+
1908
+ RAISES:
1909
+ N/A
1910
+
1911
+ EXAMPLES:
1912
+ _check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
1913
+ _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
1914
+ _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq', 'derived_col')
1915
+
1916
+ """
1917
+ table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
1918
+ table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
1919
+
1920
+ # Remove derived_column from table1_col_names if specified
1921
+ if derived_column is not None and derived_column in table1_col_names:
1922
+ table1_col_names.remove(derived_column)
1923
+
1924
+ # Check for number of columns
1925
+ if len(table1_col_names) != len(table2_col_names):
1926
+ return False
1927
+
1928
+ if is_pti is True:
1929
+ # Reposition timecode (to the first) and sequence column (to the second)
1930
+ # with their names as generated by the database, in col_name since that
1931
+ # is the default position of the columns.
1932
+ table2_col_names = _reorder_insert_list_for_pti(table2_col_names, timecode_column, sequence_column)
1933
+ table2_col_names = _rename_to_pti_columns(table2_col_names, timecode_column, sequence_column)
1934
+
1935
+ # Check for the column names
1936
+ for i in range(len(table1_col_names)):
1937
+ if table1_col_names[i] != table2_col_names[i]:
1938
+ return False
1939
+
1940
+ # Number of columns and their names in both List of ColumnExpressions match
1941
+ return True
1942
+
1943
+
1944
+ def _extract_column_info(df, types = None, index = False, index_label = None):
1945
+ """
1946
+ This is an internal function used to extract column information for a DF,
1947
+ and map to user-specified teradatasqlalchemy types, if specified,
1948
+ for Table creation.
1949
+
1950
+ PARAMETERS:
1951
+ df:
1952
+ The Pandas DataFrame object to be saved.
1953
+
1954
+ types:
1955
+ A python dictionary with column names and required types as key-value pairs.
1956
+
1957
+ index:
1958
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1959
+
1960
+ index_label:
1961
+ Column label(s) for index column(s).
1962
+
1963
+ RETURNS:
1964
+ A tuple with the following elements:
1965
+ a) List of DataFrame Column names
1966
+ b) List of equivalent teradatasqlalchemy column types
1967
+
1968
+ RAISES:
1969
+ None
1970
+
1971
+ EXAMPLES:
1972
+ _extract_column_info(df = my_df)
1973
+ _extract_column_info(df = my_df, types = {'id_col': INTEGER})
1974
+
1975
+ """
1976
+ if isinstance(df, str):
1977
+ return list(types.keys()), list(types.values())
1978
+
1979
+ col_names = _get_pd_df_column_names(df)
1980
+
1981
+ # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1982
+ # TIMESTAMP(timezone=True) else map it according to default value.
1983
+ col_types = [types.get(col_name) if types and col_name in types else
1984
+ TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes.iloc[key])
1985
+ and (df[col_name].dt.tz is not None)
1986
+ else _get_sqlalchemy_mapping_types(str(df.dtypes.iloc[key]))
1987
+ for key, col_name in enumerate(list(df.columns))]
1988
+
1989
+ ind_names = []
1990
+ ind_types = []
1991
+ if index:
1992
+ ind_names, ind_types = _get_index_labels(df, index_label)
1993
+ ind_types = [types.get(ind_name) if types and ind_name in types
1994
+ else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes.iloc[key])
1995
+ and (df[ind_name].dt.tz is not None)
1996
+ else _get_sqlalchemy_mapping_types(str(ind_types[key]))
1997
+ for key, ind_name in enumerate(ind_names)]
1998
+
1999
+ return col_names + ind_names, col_types + ind_types
2000
+
2001
+
2002
+ def _insert_from_dataframe(df, con, schema_name, table_name, index, chunksize,
2003
+ is_pti=False, timecode_column=None, sequence_column=None,
2004
+ match_column_order=True):
2005
+ """
2006
+ This is an internal function used to sequentially extract column info from DF,
2007
+ iterate rows, and insert rows manually.
2008
+ Used for Insertions to Temporary Tables & Tables with Pandas index.
2009
+
2010
+ This uses DBAPI's executeMany() which is a batch insertion method.
2011
+
2012
+ PARAMETERS:
2013
+ df:
2014
+ The Pandas DataFrame object to be saved.
2015
+
2016
+ con:
2017
+ A SQLAlchemy connectable (engine/connection) object
2018
+
2019
+ schema_name:
2020
+ Name of the schema.
2021
+
2022
+ table_name:
2023
+ Name of the table.
2024
+
2025
+ index:
2026
+ Flag specifying whether to write Pandas DataFrame index as a column or not.
2027
+
2028
+ chunksize:
2029
+ Specifies the number of rows to be loaded in a batch.
2030
+ Note:
2031
+ This is argument is used only when argument "df" is pandas DataFrame.
2032
+
2033
+ is_pti:
2034
+ Boolean flag indicating if the table should be a PTI table.
2035
+
2036
+ timecode_column:
2037
+ timecode_column required to order the select expression for the insert.
2038
+ It should be the first column in the select expression.
2039
+
2040
+ sequence_column:
2041
+ sequence_column required to order the select expression for the insert.
2042
+ It should be the second column in the select expression.
2043
+
2044
+ match_column_order:
2045
+ Specifies the order of the df to be loaded matches the order of the
2046
+ existing df or not.
2047
+
2048
+ RETURNS:
2049
+ N/A
2050
+
2051
+ RAISES:
2052
+ N/A
2053
+
2054
+ EXAMPLES:
2055
+ _insert_from_dataframe(df = my_df, con = tdconnection, schema = None, table_name = 'test_table',
2056
+ index = True, index_label = None)
2057
+ """
2058
+ col_names = _get_pd_df_column_names(df)
2059
+
2060
+ # Quoted, schema-qualified table name
2061
+ table = '"{}"'.format(table_name)
2062
+ if schema_name is not None:
2063
+ table = '"{}".{}'.format(schema_name, table_name)
2064
+
2065
+ try:
2066
+
2067
+ if is_pti:
2068
+ # This if for non-index columns.
2069
+ col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
2070
+
2071
+ is_multi_index = isinstance(df.index, pd.MultiIndex)
2072
+
2073
+ insert_list = []
2074
+
2075
+ if not match_column_order:
2076
+ ins = "INSERT INTO {} {} VALUES {};".format(
2077
+ table,
2078
+ '(' + ', '.join(col_names) + ')',
2079
+ '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
2080
+ if index is True else len(col_names))]) + ')')
2081
+ else:
2082
+ ins = "INSERT INTO {} VALUES {};".format(
2083
+ table,
2084
+ '(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
2085
+ if index is True else len(col_names))]) + ')')
2086
+
2087
+ # Empty queryband buffer before SQL call.
2088
+ UtilFuncs._set_queryband()
2089
+ rowcount = 0
2090
+ # Iterate rows of DataFrame over new re-ordered columns
2091
+ for row_index, row in enumerate(df[col_names].itertuples(index=True)):
2092
+ ins_dict = ()
2093
+ for col_index, x in enumerate(col_names):
2094
+ ins_dict = ins_dict + (row[col_index+1],)
2095
+
2096
+ if index is True:
2097
+ ins_dict = ins_dict + row[0] if is_multi_index else ins_dict + (row[0],)
2098
+
2099
+ insert_list.append(ins_dict)
2100
+ rowcount = rowcount + 1
2101
+
2102
+ # dbapi_batchsize corresponds to the max batch size for the DBAPI driver.
2103
+ # Insert the rows once the batch-size reaches the max allowed.
2104
+ if rowcount == chunksize:
2105
+ # Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
2106
+ cur = execute_sql(ins, insert_list)
2107
+ if cur is not None:
2108
+ cur.close()
2109
+ rowcount = 0
2110
+ insert_list.clear()
2111
+
2112
+ # Insert any remaining rows.
2113
+ if rowcount > 0:
2114
+ cur = execute_sql(ins, insert_list)
2115
+ if cur is not None:
2116
+ cur.close()
2117
+
2118
+ except Exception:
2119
+ raise
2120
+
2121
+
2122
+ def _get_pd_df_column_names(df):
2123
+ """
2124
+ Internal function to return the names of columns in a Pandas DataFrame.
2125
+
2126
+ PARAMETERS
2127
+ df:
2128
+ The Pandas DataFrame to fetch the column names for.
2129
+
2130
+ RETURNS:
2131
+ A list of Strings
2132
+
2133
+ RAISES:
2134
+ None
2135
+
2136
+ EXAMPLES:
2137
+ _get_pd_df_column_names(df = my_df)
2138
+ """
2139
+ return df.columns.tolist()
2140
+
2141
+
2142
+ def _get_sqlalchemy_mapping(key):
2143
+ """
2144
+ This is an internal function used to returns a SQLAlchemy Type Mapping
2145
+ for a given Pandas DataFrame column Type.
2146
+ Used for Table Object creation internally based on DF column info.
2147
+
2148
+ For an unknown key, String (Mapping to VARCHAR) is returned
2149
+
2150
+ PARAMETERS:
2151
+ key : String representing Pandas type ('int64', 'object' etc.)
2152
+
2153
+ RETURNS:
2154
+ SQLAlchemy Type Object(Integer, String, Float, DateTime etc.)
2155
+
2156
+ RAISES:
2157
+ N/A
2158
+
2159
+ EXAMPLES:
2160
+ _get_sqlalchemy_mapping(key = 'int64')
2161
+ """
2162
+ teradata_types_map = _get_all_sqlalchemy_mappings()
2163
+
2164
+ if key in teradata_types_map.keys():
2165
+ return teradata_types_map.get(key)
2166
+ else:
2167
+ return VARCHAR(configure.default_varchar_size,charset='UNICODE')
2168
+
2169
+
2170
+ def _get_all_sqlalchemy_mappings():
2171
+ """
2172
+ This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
2173
+ It contains mappings from pandas data type to objects of SQLAlchemy Types
2174
+
2175
+ PARAMETERS:
2176
+
2177
+ RETURNS:
2178
+ dictionary { pandas_type : SQLAlchemy Type Object}
2179
+
2180
+ RAISES:
2181
+ N/A
2182
+
2183
+ EXAMPLES:
2184
+ _get_all_sqlalchemy_mappings()
2185
+ """
2186
+ teradata_types_map = {'int32':INTEGER(), 'int64':BIGINT(), "Int64": INTEGER(),
2187
+ 'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
2188
+ 'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
2189
+ 'float64':FLOAT(), 'float32':FLOAT(), 'bool':BYTEINT(),
2190
+ 'datetime64':TIMESTAMP(), 'datetime64[ns]':TIMESTAMP(),
2191
+ 'datetime64[ns, UTC]':TIMESTAMP(timezone=True),
2192
+ 'timedelta64[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
2193
+ 'timedelta[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE')}
2194
+
2195
+ return teradata_types_map
2196
+
2197
+
2198
+ def _get_sqlalchemy_mapping_types(key):
2199
+ """
2200
+ This is an internal function used to return a SQLAlchemy Type Mapping
2201
+ for a given Pandas DataFrame column Type.
2202
+ Used for Table Object creation internally based on DF column info.
2203
+
2204
+ For an unknown key, String (Mapping to VARCHAR) is returned
2205
+
2206
+ PARAMETERS:
2207
+ key : String representing Pandas type ('int64', 'object' etc.)
2208
+
2209
+ RETURNS:
2210
+ SQLAlchemy Type (Integer, String, Float, DateTime etc.)
2211
+
2212
+ RAISES:
2213
+ N/A
2214
+
2215
+ EXAMPLES:
2216
+ _get_sqlalchemy_mapping_types(key = 'int64')
2217
+ """
2218
+ teradata_types_map = _get_all_sqlalchemy_types_mapping()
2219
+
2220
+ if key in teradata_types_map.keys():
2221
+ return teradata_types_map.get(key)
2222
+ else:
2223
+ return VARCHAR(configure.default_varchar_size,charset='UNICODE')
2224
+
2225
+
2226
+ def _get_all_sqlalchemy_types_mapping():
2227
+ """
2228
+ This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
2229
+ It contains mappings from pandas data type to SQLAlchemyTypes
2230
+
2231
+ PARAMETERS:
2232
+
2233
+ RETURNS:
2234
+ dictionary { pandas_type : SQLAlchemy Type}
2235
+
2236
+ RAISES:
2237
+ N/A
2238
+
2239
+ EXAMPLES:
2240
+ _get_all_sqlalchemy_types_mapping()
2241
+ """
2242
+ teradata_types_map = {'int32': INTEGER, 'int64': BIGINT,
2243
+ 'object': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
2244
+ 'O': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
2245
+ 'float64': FLOAT, 'float32': FLOAT, 'bool': BYTEINT,
2246
+ 'datetime64': TIMESTAMP, 'datetime64[ns]': TIMESTAMP,
2247
+ 'datetime64[ns, UTC]': TIMESTAMP(timezone=True),
2248
+ 'timedelta64[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
2249
+ 'timedelta[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE')}
2250
+
2251
+ return teradata_types_map
2252
+
2253
+
2254
+ def _validate_timezero_date(timezero_date):
2255
+ """
2256
+ Internal function to validate timezero_date specified when creating a
2257
+ Primary Time Index (PTI) table.
2258
+
2259
+ PARAMETERS:
2260
+ timezero_date:
2261
+ The timezero_date passed to primary_time_index().
2262
+
2263
+ RETURNS:
2264
+ True if the value is valid.
2265
+
2266
+ RAISES:
2267
+ ValueError when the value is invalid.
2268
+
2269
+ EXAMPLE:
2270
+ _validate_timezero_date("DATE '2011-01-01'")
2271
+ _validate_timezero_date('2011-01-01') # Invalid
2272
+ """
2273
+ # Return True is it is not specified or is None since it is optional
2274
+ if timezero_date is None:
2275
+ return True
2276
+
2277
+ pattern = re.compile(PTITableConstants.PATTERN_TIMEZERO_DATE.value)
2278
+ match = pattern.match(timezero_date)
2279
+
2280
+ err_msg = Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(timezero_date,
2281
+ 'timezero_date',
2282
+ "str of format DATE 'YYYY-MM-DD'")
2283
+
2284
+ try:
2285
+ datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
2286
+ except (ValueError, AttributeError):
2287
+ raise TeradataMlException(err_msg,
2288
+ MessageCodes.INVALID_ARG_VALUE)
2289
+
2290
+ # Looks like the value is valid
2291
+ return True
2292
+
2293
+ def _validate_valid_time_columns(df, valid_time_columns, derived_column=None, types=None):
2294
+ """
2295
+ Internal function to validate that the columns specified in valid_time_columns
2296
+ exist in the DataFrame, are of type DATE or TIMESTAMP, and are of the same type.
2297
+ Also checks that the derived_column, if specified, is not present in the DataFrame.
2298
+
2299
+ PARAMETERS:
2300
+ df:
2301
+ Required Argument.
2302
+ Specifies the Pandas or teradataml DataFrame object to be validated.
2303
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
2304
+
2305
+ valid_time_columns:
2306
+ Required Argument.
2307
+ Specifies a tuple of two column names representing the temporal validity period.
2308
+ Types: tuple of Strings
2309
+
2310
+ derived_column:
2311
+ Optional Argument.
2312
+ Specifies the name of the derived column that should not be
2313
+ present in the DataFrame.
2314
+ Types: String
2315
+
2316
+ types:
2317
+ Optional Argument.
2318
+ Specifies a python dictionary with column-name(key) to column-type(value)
2319
+ mapping to create DataFrames.
2320
+ Types: dict
2321
+
2322
+ RETURNS:
2323
+ None
2324
+
2325
+ RAISES:
2326
+ TeradataMlException
2327
+
2328
+ EXAMPLES:
2329
+ _validate_valid_time_columns(
2330
+ df=my_df,
2331
+ valid_time_columns=('start_date', 'end_date'),
2332
+ derived_column='validity_period',
2333
+ types={'start_date': DATE, 'end_date': DATE}
2334
+ )
2335
+ """
2336
+ df_columns = _get_pd_df_column_names(df) if isinstance(df, pd.DataFrame) else df.columns
2337
+ df_dtypes = (
2338
+ {
2339
+ col: _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
2340
+ for col in df.dtypes.keys()
2341
+ }
2342
+ if isinstance(df, pd.DataFrame)
2343
+ else df._td_column_names_and_sqlalchemy_types
2344
+ )
2345
+ # If types argument is provided, override the dtypes for those columns
2346
+ if types is not None:
2347
+ for col, typ in types.items():
2348
+ if col in df_columns:
2349
+ df_dtypes[col] = typ
2350
+
2351
+
2352
+ if derived_column is not None and derived_column in df_columns:
2353
+ raise TeradataMlException(
2354
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND).format(
2355
+ derived_column, 'derived_column', 'dataframe.', 'Provide value which is not part of DataFrame columns'
2356
+ ),
2357
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND
2358
+ )
2359
+ # valid_time_columns can be a tuple of two column names or a single column name
2360
+ if isinstance(valid_time_columns, tuple):
2361
+ if len(valid_time_columns) != 2:
2362
+ raise TeradataMlException(
2363
+ Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
2364
+ valid_time_columns, 'valid_time_columns', 'tuple of two column names'
2365
+ ),
2366
+ MessageCodes.INVALID_ARG_VALUE
2367
+ )
2368
+ # Check if both columns are present in the DataFrame
2369
+ for col in valid_time_columns:
2370
+ if col not in df_columns:
2371
+ raise TeradataMlException(
2372
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(
2373
+ col, 'valid_time_columns', 'df', 'DataFrame'
2374
+ ),
2375
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND
2376
+ )
2377
+
2378
+ col1_type = df_dtypes[valid_time_columns[0]]
2379
+ col2_type = df_dtypes[valid_time_columns[1]]
2380
+
2381
+ # When types are specified, ensure they are DATE or TIMESTAMP objects or classes.
2382
+ if not (
2383
+ isinstance(col1_type, TIMESTAMP) or isinstance(col1_type, DATE) or
2384
+ col1_type is TIMESTAMP or col1_type is DATE
2385
+ ):
2386
+ raise TeradataMlException(
2387
+ Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
2388
+ 'valid_time_columns',
2389
+ col1_type.__name__ if isinstance(col1_type, type)
2390
+ else col1_type.__class__.__name__, 'DATE or TIMESTAMP'
2391
+ ),
2392
+ MessageCodes.INVALID_COLUMN_TYPE
2393
+ )
2394
+ # When types are specified, ensure they are DATE or TIMESTAMP objects or classes.
2395
+ if not (
2396
+ isinstance(col2_type, TIMESTAMP) or isinstance(col2_type, DATE) or
2397
+ col2_type is TIMESTAMP or col2_type is DATE
2398
+ ):
2399
+ raise TeradataMlException(
2400
+ Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
2401
+ 'valid_time_columns',
2402
+ col2_type.__name__ if isinstance(col2_type, type)
2403
+ else col2_type.__class__.__name__, 'DATE or TIMESTAMP'
2404
+ ),
2405
+ MessageCodes.INVALID_COLUMN_TYPE
2406
+ )
2407
+
2408
+ if type(col1_type) != type(col2_type):
2409
+ raise ValueError(
2410
+ Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
2411
+ valid_time_columns, 'valid_time_columns', 'both columns of same type (DATE or TIMESTAMP)'
2412
+ ),
2413
+ MessageCodes.INVALID_ARG_VALUE
2414
+ )
2415
+ elif isinstance(valid_time_columns, str):
2416
+ col = valid_time_columns
2417
+ col_type = df_dtypes[col]
2418
+
2419
+ if col not in df_columns:
2420
+ raise TeradataMlException(
2421
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(
2422
+ col, 'valid_time_columns', 'df', 'DataFrame'
2423
+ ),
2424
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND
2425
+ )
2426
+ # When types are specified, ensure they are PERIOD_DATE or PERIOD_TIMESTAMP objects or classes.
2427
+ if not (
2428
+ isinstance(col_type, PERIOD_TIMESTAMP) or isinstance(col_type, PERIOD_DATE) or
2429
+ col_type is PERIOD_TIMESTAMP or col_type is PERIOD_DATE
2430
+ ):
2431
+ raise TeradataMlException(
2432
+ Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
2433
+ 'valid_time_columns',
2434
+ col_type.__name__ if isinstance(col_type, type)
2435
+ else col_type.__class__.__name__, 'PERIOD_DATE or PERIOD_TIMESTAMP'
2436
+ ),
2437
+ MessageCodes.INVALID_COLUMN_TYPE
2438
+ )
2439
+ else:
2440
+ raise TeradataMlException(
2441
+ Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
2442
+ valid_time_columns, 'valid_time_columns', 'tuple of two column names or a single column name'
2443
+ ),
2444
+ MessageCodes.INVALID_ARG_VALUE
2445
+ )
2446
+