teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1285) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1864 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2013 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +804 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1628 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +993 -0
  31. teradataml/automl/data_transformation.py +727 -0
  32. teradataml/automl/feature_engineering.py +1648 -0
  33. teradataml/automl/feature_exploration.py +547 -0
  34. teradataml/automl/model_evaluation.py +163 -0
  35. teradataml/automl/model_training.py +887 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/pkce_client.py +481 -481
  41. teradataml/common/aed_utils.py +6 -2
  42. teradataml/common/bulk_exposed_utils.py +111 -111
  43. teradataml/common/constants.py +1433 -1441
  44. teradataml/common/deprecations.py +160 -0
  45. teradataml/common/exceptions.py +73 -73
  46. teradataml/common/formula.py +742 -742
  47. teradataml/common/garbagecollector.py +592 -635
  48. teradataml/common/messagecodes.py +422 -431
  49. teradataml/common/messages.py +227 -231
  50. teradataml/common/sqlbundle.py +693 -693
  51. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  52. teradataml/common/utils.py +2418 -2500
  53. teradataml/common/warnings.py +25 -25
  54. teradataml/common/wrapper_utils.py +1 -110
  55. teradataml/config/dummy_file1.cfg +4 -4
  56. teradataml/config/dummy_file2.cfg +2 -2
  57. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  58. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  59. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  60. teradataml/context/aed_context.py +217 -217
  61. teradataml/context/context.py +1071 -999
  62. teradataml/data/A_loan.csv +19 -19
  63. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  64. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  65. teradataml/data/B_loan.csv +49 -49
  66. teradataml/data/BuoyData2.csv +17 -17
  67. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  68. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  69. teradataml/data/Convolve2RealsLeft.csv +5 -5
  70. teradataml/data/Convolve2RealsRight.csv +5 -5
  71. teradataml/data/Convolve2ValidLeft.csv +11 -11
  72. teradataml/data/Convolve2ValidRight.csv +11 -11
  73. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  74. teradataml/data/Orders1_12mf.csv +24 -24
  75. teradataml/data/Pi_loan.csv +7 -7
  76. teradataml/data/SMOOTHED_DATA.csv +7 -7
  77. teradataml/data/TestDFFT8.csv +9 -9
  78. teradataml/data/TestRiver.csv +109 -109
  79. teradataml/data/Traindata.csv +28 -28
  80. teradataml/data/acf.csv +17 -17
  81. teradataml/data/adaboost_example.json +34 -34
  82. teradataml/data/adaboostpredict_example.json +24 -24
  83. teradataml/data/additional_table.csv +10 -10
  84. teradataml/data/admissions_test.csv +21 -21
  85. teradataml/data/admissions_train.csv +41 -41
  86. teradataml/data/admissions_train_nulls.csv +41 -41
  87. teradataml/data/ageandheight.csv +13 -13
  88. teradataml/data/ageandpressure.csv +31 -31
  89. teradataml/data/antiselect_example.json +36 -36
  90. teradataml/data/antiselect_input.csv +8 -8
  91. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  92. teradataml/data/applicant_external.csv +6 -6
  93. teradataml/data/applicant_reference.csv +6 -6
  94. teradataml/data/arima_example.json +9 -9
  95. teradataml/data/assortedtext_input.csv +8 -8
  96. teradataml/data/attribution_example.json +33 -33
  97. teradataml/data/attribution_sample_table.csv +27 -27
  98. teradataml/data/attribution_sample_table1.csv +6 -6
  99. teradataml/data/attribution_sample_table2.csv +11 -11
  100. teradataml/data/bank_churn.csv +10001 -0
  101. teradataml/data/bank_web_clicks1.csv +42 -42
  102. teradataml/data/bank_web_clicks2.csv +91 -91
  103. teradataml/data/bank_web_url.csv +85 -85
  104. teradataml/data/barrier.csv +2 -2
  105. teradataml/data/barrier_new.csv +3 -3
  106. teradataml/data/betweenness_example.json +13 -13
  107. teradataml/data/bin_breaks.csv +8 -8
  108. teradataml/data/bin_fit_ip.csv +3 -3
  109. teradataml/data/binary_complex_left.csv +11 -11
  110. teradataml/data/binary_complex_right.csv +11 -11
  111. teradataml/data/binary_matrix_complex_left.csv +21 -21
  112. teradataml/data/binary_matrix_complex_right.csv +21 -21
  113. teradataml/data/binary_matrix_real_left.csv +21 -21
  114. teradataml/data/binary_matrix_real_right.csv +21 -21
  115. teradataml/data/blood2ageandweight.csv +26 -26
  116. teradataml/data/bmi.csv +501 -0
  117. teradataml/data/boston.csv +507 -507
  118. teradataml/data/buoydata_mix.csv +11 -11
  119. teradataml/data/burst_data.csv +5 -5
  120. teradataml/data/burst_example.json +20 -20
  121. teradataml/data/byom_example.json +17 -17
  122. teradataml/data/bytes_table.csv +3 -3
  123. teradataml/data/cal_housing_ex_raw.csv +70 -70
  124. teradataml/data/callers.csv +7 -7
  125. teradataml/data/calls.csv +10 -10
  126. teradataml/data/cars_hist.csv +33 -33
  127. teradataml/data/cat_table.csv +24 -24
  128. teradataml/data/ccm_example.json +31 -31
  129. teradataml/data/ccm_input.csv +91 -91
  130. teradataml/data/ccm_input2.csv +13 -13
  131. teradataml/data/ccmexample.csv +101 -101
  132. teradataml/data/ccmprepare_example.json +8 -8
  133. teradataml/data/ccmprepare_input.csv +91 -91
  134. teradataml/data/cfilter_example.json +12 -12
  135. teradataml/data/changepointdetection_example.json +18 -18
  136. teradataml/data/changepointdetectionrt_example.json +8 -8
  137. teradataml/data/chi_sq.csv +2 -2
  138. teradataml/data/churn_data.csv +14 -14
  139. teradataml/data/churn_emission.csv +35 -35
  140. teradataml/data/churn_initial.csv +3 -3
  141. teradataml/data/churn_state_transition.csv +5 -5
  142. teradataml/data/citedges_2.csv +745 -745
  143. teradataml/data/citvertices_2.csv +1210 -1210
  144. teradataml/data/clicks2.csv +16 -16
  145. teradataml/data/clickstream.csv +12 -12
  146. teradataml/data/clickstream1.csv +11 -11
  147. teradataml/data/closeness_example.json +15 -15
  148. teradataml/data/complaints.csv +21 -21
  149. teradataml/data/complaints_mini.csv +3 -3
  150. teradataml/data/complaints_testtoken.csv +224 -224
  151. teradataml/data/complaints_tokens_test.csv +353 -353
  152. teradataml/data/complaints_traintoken.csv +472 -472
  153. teradataml/data/computers_category.csv +1001 -1001
  154. teradataml/data/computers_test1.csv +1252 -1252
  155. teradataml/data/computers_train1.csv +5009 -5009
  156. teradataml/data/computers_train1_clustered.csv +5009 -5009
  157. teradataml/data/confusionmatrix_example.json +9 -9
  158. teradataml/data/conversion_event_table.csv +3 -3
  159. teradataml/data/corr_input.csv +17 -17
  160. teradataml/data/correlation_example.json +11 -11
  161. teradataml/data/coxhazardratio_example.json +39 -39
  162. teradataml/data/coxph_example.json +15 -15
  163. teradataml/data/coxsurvival_example.json +28 -28
  164. teradataml/data/cpt.csv +41 -41
  165. teradataml/data/credit_ex_merged.csv +45 -45
  166. teradataml/data/customer_loyalty.csv +301 -301
  167. teradataml/data/customer_loyalty_newseq.csv +31 -31
  168. teradataml/data/dataframe_example.json +146 -146
  169. teradataml/data/decisionforest_example.json +37 -37
  170. teradataml/data/decisionforestpredict_example.json +38 -38
  171. teradataml/data/decisiontree_example.json +21 -21
  172. teradataml/data/decisiontreepredict_example.json +45 -45
  173. teradataml/data/dfft2_size4_real.csv +17 -17
  174. teradataml/data/dfft2_test_matrix16.csv +17 -17
  175. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  176. teradataml/data/diabetes.csv +443 -443
  177. teradataml/data/diabetes_test.csv +89 -89
  178. teradataml/data/dict_table.csv +5 -5
  179. teradataml/data/docperterm_table.csv +4 -4
  180. teradataml/data/docs/__init__.py +1 -1
  181. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  182. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  183. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  184. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  185. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  186. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  187. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  188. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  189. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  190. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  191. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  192. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  193. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  194. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  195. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  196. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  197. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  198. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  199. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  200. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  201. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  202. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  203. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  204. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  205. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  206. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  207. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  208. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  209. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +132 -132
  210. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +103 -103
  211. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  212. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +101 -101
  213. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  214. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  215. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  216. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  217. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  218. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  219. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  220. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  221. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  222. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  223. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  224. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  225. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  226. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  227. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  228. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  229. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  230. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  231. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  232. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  233. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  234. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +126 -126
  235. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  236. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  237. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  238. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  239. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  240. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  241. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  242. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  243. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +243 -243
  244. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  245. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  246. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  247. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  248. teradataml/data/docs/sqle/docs_17_20/FTest.py +160 -160
  249. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  250. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  251. teradataml/data/docs/sqle/docs_17_20/GLM.py +380 -380
  252. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  253. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  254. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  255. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +123 -123
  256. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  257. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  258. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  259. teradataml/data/docs/sqle/docs_17_20/KMeans.py +204 -204
  260. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  261. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  262. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  263. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  264. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  265. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  266. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  267. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  268. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +117 -117
  269. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  270. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  271. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  272. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  273. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +225 -225
  274. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +115 -115
  275. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  276. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  277. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  278. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  279. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  280. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  281. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  282. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  283. teradataml/data/docs/sqle/docs_17_20/ROC.py +163 -163
  284. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  285. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  286. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  287. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  288. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  289. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  290. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  291. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  292. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +202 -202
  293. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  294. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +197 -197
  295. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +110 -109
  296. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  297. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  298. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  299. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  300. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  301. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  302. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  303. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  304. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +171 -171
  305. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  306. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  307. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  308. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  309. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  310. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  311. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  312. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  313. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  314. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  315. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  316. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +353 -353
  317. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +275 -275
  318. teradataml/data/docs/sqle/docs_17_20/ZTest.py +155 -155
  319. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  320. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  321. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  322. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  323. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  324. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  325. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  326. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  327. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  328. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  329. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  330. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  331. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  332. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  333. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  334. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  335. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  336. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  337. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  338. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  339. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  340. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  341. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  342. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  343. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  344. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  345. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  346. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  347. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  348. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  349. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  350. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  351. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  352. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  353. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  354. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  355. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  356. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  357. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  358. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  359. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  360. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  361. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  362. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  363. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  364. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  365. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  366. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  367. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  368. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  369. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  370. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  371. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  372. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  373. teradataml/data/dtw_example.json +17 -17
  374. teradataml/data/dtw_t1.csv +11 -11
  375. teradataml/data/dtw_t2.csv +4 -4
  376. teradataml/data/dwt2d_example.json +15 -15
  377. teradataml/data/dwt_example.json +14 -14
  378. teradataml/data/dwt_filter_dim.csv +5 -5
  379. teradataml/data/emission.csv +9 -9
  380. teradataml/data/emp_table_by_dept.csv +19 -19
  381. teradataml/data/employee_info.csv +4 -4
  382. teradataml/data/employee_table.csv +6 -6
  383. teradataml/data/excluding_event_table.csv +2 -2
  384. teradataml/data/finance_data.csv +6 -6
  385. teradataml/data/finance_data2.csv +61 -61
  386. teradataml/data/finance_data3.csv +93 -93
  387. teradataml/data/fish.csv +160 -0
  388. teradataml/data/fm_blood2ageandweight.csv +26 -26
  389. teradataml/data/fmeasure_example.json +11 -11
  390. teradataml/data/followers_leaders.csv +10 -10
  391. teradataml/data/fpgrowth_example.json +12 -12
  392. teradataml/data/frequentpaths_example.json +29 -29
  393. teradataml/data/friends.csv +9 -9
  394. teradataml/data/fs_input.csv +33 -33
  395. teradataml/data/fs_input1.csv +33 -33
  396. teradataml/data/genData.csv +513 -513
  397. teradataml/data/geodataframe_example.json +39 -39
  398. teradataml/data/glass_types.csv +215 -0
  399. teradataml/data/glm_admissions_model.csv +12 -12
  400. teradataml/data/glm_example.json +29 -29
  401. teradataml/data/glml1l2_example.json +28 -28
  402. teradataml/data/glml1l2predict_example.json +54 -54
  403. teradataml/data/glmpredict_example.json +54 -54
  404. teradataml/data/gq_t1.csv +21 -21
  405. teradataml/data/hconvolve_complex_right.csv +5 -5
  406. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  407. teradataml/data/histogram_example.json +11 -11
  408. teradataml/data/hmmdecoder_example.json +78 -78
  409. teradataml/data/hmmevaluator_example.json +24 -24
  410. teradataml/data/hmmsupervised_example.json +10 -10
  411. teradataml/data/hmmunsupervised_example.json +7 -7
  412. teradataml/data/house_values.csv +12 -12
  413. teradataml/data/house_values2.csv +13 -13
  414. teradataml/data/housing_cat.csv +7 -7
  415. teradataml/data/housing_data.csv +9 -9
  416. teradataml/data/housing_test.csv +47 -47
  417. teradataml/data/housing_test_binary.csv +47 -47
  418. teradataml/data/housing_train.csv +493 -493
  419. teradataml/data/housing_train_attribute.csv +4 -4
  420. teradataml/data/housing_train_binary.csv +437 -437
  421. teradataml/data/housing_train_parameter.csv +2 -2
  422. teradataml/data/housing_train_response.csv +493 -493
  423. teradataml/data/ibm_stock.csv +370 -370
  424. teradataml/data/ibm_stock1.csv +370 -370
  425. teradataml/data/identitymatch_example.json +21 -21
  426. teradataml/data/idf_table.csv +4 -4
  427. teradataml/data/impressions.csv +101 -101
  428. teradataml/data/inflation.csv +21 -21
  429. teradataml/data/initial.csv +3 -3
  430. teradataml/data/insect_sprays.csv +12 -12
  431. teradataml/data/insurance.csv +1339 -1339
  432. teradataml/data/interpolator_example.json +12 -12
  433. teradataml/data/iris_altinput.csv +481 -481
  434. teradataml/data/iris_attribute_output.csv +8 -8
  435. teradataml/data/iris_attribute_test.csv +121 -121
  436. teradataml/data/iris_attribute_train.csv +481 -481
  437. teradataml/data/iris_category_expect_predict.csv +31 -31
  438. teradataml/data/iris_data.csv +151 -0
  439. teradataml/data/iris_input.csv +151 -151
  440. teradataml/data/iris_response_train.csv +121 -121
  441. teradataml/data/iris_test.csv +31 -31
  442. teradataml/data/iris_train.csv +121 -121
  443. teradataml/data/join_table1.csv +4 -4
  444. teradataml/data/join_table2.csv +4 -4
  445. teradataml/data/jsons/anly_function_name.json +6 -6
  446. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  447. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  448. teradataml/data/jsons/byom/h2opredict.json +194 -194
  449. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  450. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  451. teradataml/data/jsons/paired_functions.json +435 -435
  452. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  453. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  454. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  455. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  456. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  457. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  458. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  459. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  460. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  461. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  462. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  463. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  464. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  465. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  466. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  467. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  468. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  469. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  470. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  471. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  472. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  473. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  474. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  475. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  476. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  477. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  478. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  479. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  480. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  481. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  482. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  483. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  484. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  485. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  486. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  487. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  488. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  489. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  490. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  491. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  492. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  493. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  494. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  495. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  496. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  497. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  498. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  499. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  500. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  501. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  502. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  503. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  504. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  505. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  506. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  507. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  508. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  509. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  510. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  511. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  512. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  513. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  514. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  515. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  516. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  517. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  518. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  519. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  520. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  521. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  522. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  523. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  524. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  525. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  526. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  527. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  528. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  529. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  531. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  532. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  533. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  534. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  535. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  536. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  537. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  539. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  540. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  541. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  542. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  543. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  544. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  545. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  546. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  547. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  548. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  549. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  550. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  551. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  552. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  553. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  554. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  555. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  556. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  557. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  558. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +76 -76
  559. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  560. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  561. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  562. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  563. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  564. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  565. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  566. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  567. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  568. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  569. teradataml/data/jsons/sqle/17.20/TD_FTest.json +186 -186
  570. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  571. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  572. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  573. teradataml/data/jsons/sqle/17.20/TD_GLM.json +431 -431
  574. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +125 -125
  575. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  576. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  577. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +91 -91
  578. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  579. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  580. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  581. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +211 -211
  582. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  583. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  584. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  585. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +101 -101
  586. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  587. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  588. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  589. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  590. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  591. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  592. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  593. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  594. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  595. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  596. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  597. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  598. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  599. teradataml/data/jsons/sqle/17.20/TD_ROC.json +177 -177
  600. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  601. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  602. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  603. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  604. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  605. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  606. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  607. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  608. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +124 -124
  609. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +156 -156
  610. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +70 -70
  611. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  612. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  613. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  614. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  615. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  616. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  617. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  618. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  619. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  620. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  621. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  622. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  623. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  624. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  625. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +312 -312
  626. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +182 -182
  627. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +170 -170
  628. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  629. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  630. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  631. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  632. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  633. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  634. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  635. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  636. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  637. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  638. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  639. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  640. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  641. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  642. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  643. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  644. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  645. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  646. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  647. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  648. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  649. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  650. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  651. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  653. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  654. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  655. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  656. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  657. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  658. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  659. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  660. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  661. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  662. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  663. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  664. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  665. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  666. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  667. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  668. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  669. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  670. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  671. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  672. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  673. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  674. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  675. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  676. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  677. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  678. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  679. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  680. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  681. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  682. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  683. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  684. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  685. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  686. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  687. teradataml/data/kmeans_example.json +17 -17
  688. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  689. teradataml/data/knn_example.json +18 -18
  690. teradataml/data/knnrecommender_example.json +6 -6
  691. teradataml/data/knnrecommenderpredict_example.json +12 -12
  692. teradataml/data/lar_example.json +17 -17
  693. teradataml/data/larpredict_example.json +30 -30
  694. teradataml/data/lc_new_predictors.csv +5 -5
  695. teradataml/data/lc_new_reference.csv +9 -9
  696. teradataml/data/lda_example.json +8 -8
  697. teradataml/data/ldainference_example.json +14 -14
  698. teradataml/data/ldatopicsummary_example.json +8 -8
  699. teradataml/data/levendist_input.csv +13 -13
  700. teradataml/data/levenshteindistance_example.json +10 -10
  701. teradataml/data/linreg_example.json +9 -9
  702. teradataml/data/load_example_data.py +326 -323
  703. teradataml/data/loan_prediction.csv +295 -295
  704. teradataml/data/lungcancer.csv +138 -138
  705. teradataml/data/mappingdata.csv +12 -12
  706. teradataml/data/milk_timeseries.csv +157 -157
  707. teradataml/data/min_max_titanic.csv +4 -4
  708. teradataml/data/minhash_example.json +6 -6
  709. teradataml/data/ml_ratings.csv +7547 -7547
  710. teradataml/data/ml_ratings_10.csv +2445 -2445
  711. teradataml/data/model1_table.csv +5 -5
  712. teradataml/data/model2_table.csv +5 -5
  713. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  714. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  715. teradataml/data/modularity_example.json +12 -12
  716. teradataml/data/movavg_example.json +7 -7
  717. teradataml/data/mtx1.csv +7 -7
  718. teradataml/data/mtx2.csv +13 -13
  719. teradataml/data/multi_model_classification.csv +401 -0
  720. teradataml/data/multi_model_regression.csv +401 -0
  721. teradataml/data/mvdfft8.csv +9 -9
  722. teradataml/data/naivebayes_example.json +9 -9
  723. teradataml/data/naivebayespredict_example.json +19 -19
  724. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  725. teradataml/data/naivebayestextclassifier_example.json +8 -8
  726. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  727. teradataml/data/name_Find_configure.csv +10 -10
  728. teradataml/data/namedentityfinder_example.json +14 -14
  729. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  730. teradataml/data/namedentityfindertrainer_example.json +6 -6
  731. teradataml/data/nb_iris_input_test.csv +31 -31
  732. teradataml/data/nb_iris_input_train.csv +121 -121
  733. teradataml/data/nbp_iris_model.csv +13 -13
  734. teradataml/data/ner_extractor_text.csv +2 -2
  735. teradataml/data/ner_sports_test2.csv +29 -29
  736. teradataml/data/ner_sports_train.csv +501 -501
  737. teradataml/data/nerevaluator_example.json +5 -5
  738. teradataml/data/nerextractor_example.json +18 -18
  739. teradataml/data/nermem_sports_test.csv +17 -17
  740. teradataml/data/nermem_sports_train.csv +50 -50
  741. teradataml/data/nertrainer_example.json +6 -6
  742. teradataml/data/ngrams_example.json +6 -6
  743. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  744. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  745. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  746. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  747. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  748. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  749. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  750. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  751. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  752. teradataml/data/npath_example.json +23 -23
  753. teradataml/data/ntree_example.json +14 -14
  754. teradataml/data/numeric_strings.csv +4 -4
  755. teradataml/data/numerics.csv +4 -4
  756. teradataml/data/ocean_buoy.csv +17 -17
  757. teradataml/data/ocean_buoy2.csv +17 -17
  758. teradataml/data/ocean_buoys.csv +27 -27
  759. teradataml/data/ocean_buoys2.csv +10 -10
  760. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  761. teradataml/data/ocean_buoys_seq.csv +29 -29
  762. teradataml/data/openml_example.json +63 -0
  763. teradataml/data/optional_event_table.csv +4 -4
  764. teradataml/data/orders1.csv +11 -11
  765. teradataml/data/orders1_12.csv +12 -12
  766. teradataml/data/orders_ex.csv +4 -4
  767. teradataml/data/pack_example.json +8 -8
  768. teradataml/data/package_tracking.csv +19 -19
  769. teradataml/data/package_tracking_pti.csv +18 -18
  770. teradataml/data/pagerank_example.json +13 -13
  771. teradataml/data/paragraphs_input.csv +6 -6
  772. teradataml/data/pathanalyzer_example.json +7 -7
  773. teradataml/data/pathgenerator_example.json +7 -7
  774. teradataml/data/phrases.csv +7 -7
  775. teradataml/data/pivot_example.json +8 -8
  776. teradataml/data/pivot_input.csv +22 -22
  777. teradataml/data/playerRating.csv +31 -31
  778. teradataml/data/postagger_example.json +6 -6
  779. teradataml/data/posttagger_output.csv +44 -44
  780. teradataml/data/production_data.csv +16 -16
  781. teradataml/data/production_data2.csv +7 -7
  782. teradataml/data/randomsample_example.json +31 -31
  783. teradataml/data/randomwalksample_example.json +8 -8
  784. teradataml/data/rank_table.csv +6 -6
  785. teradataml/data/ref_mobile_data.csv +4 -4
  786. teradataml/data/ref_mobile_data_dense.csv +2 -2
  787. teradataml/data/ref_url.csv +17 -17
  788. teradataml/data/restaurant_reviews.csv +7 -7
  789. teradataml/data/river_data.csv +145 -145
  790. teradataml/data/roc_example.json +7 -7
  791. teradataml/data/roc_input.csv +101 -101
  792. teradataml/data/rule_inputs.csv +6 -6
  793. teradataml/data/rule_table.csv +2 -2
  794. teradataml/data/sales.csv +7 -7
  795. teradataml/data/sales_transaction.csv +501 -501
  796. teradataml/data/salesdata.csv +342 -342
  797. teradataml/data/sample_cities.csv +2 -2
  798. teradataml/data/sample_shapes.csv +10 -10
  799. teradataml/data/sample_streets.csv +2 -2
  800. teradataml/data/sampling_example.json +15 -15
  801. teradataml/data/sax_example.json +8 -8
  802. teradataml/data/scale_example.json +23 -23
  803. teradataml/data/scale_housing.csv +11 -11
  804. teradataml/data/scale_housing_test.csv +6 -6
  805. teradataml/data/scale_stat.csv +11 -11
  806. teradataml/data/scalebypartition_example.json +13 -13
  807. teradataml/data/scalemap_example.json +13 -13
  808. teradataml/data/scalesummary_example.json +12 -12
  809. teradataml/data/score_category.csv +101 -101
  810. teradataml/data/score_summary.csv +4 -4
  811. teradataml/data/script_example.json +9 -9
  812. teradataml/data/scripts/deploy_script.py +65 -0
  813. teradataml/data/scripts/mapper.R +20 -0
  814. teradataml/data/scripts/mapper.py +15 -15
  815. teradataml/data/scripts/mapper_replace.py +15 -15
  816. teradataml/data/scripts/sklearn/__init__.py +0 -0
  817. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  818. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  819. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  820. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  821. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  822. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  823. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  824. teradataml/data/seeds.csv +10 -10
  825. teradataml/data/sentenceextractor_example.json +6 -6
  826. teradataml/data/sentiment_extract_input.csv +11 -11
  827. teradataml/data/sentiment_train.csv +16 -16
  828. teradataml/data/sentiment_word.csv +20 -20
  829. teradataml/data/sentiment_word_input.csv +19 -19
  830. teradataml/data/sentimentextractor_example.json +24 -24
  831. teradataml/data/sentimenttrainer_example.json +8 -8
  832. teradataml/data/sequence_table.csv +10 -10
  833. teradataml/data/seriessplitter_example.json +7 -7
  834. teradataml/data/sessionize_example.json +17 -17
  835. teradataml/data/sessionize_table.csv +116 -116
  836. teradataml/data/setop_test1.csv +24 -24
  837. teradataml/data/setop_test2.csv +22 -22
  838. teradataml/data/soc_nw_edges.csv +10 -10
  839. teradataml/data/soc_nw_vertices.csv +7 -7
  840. teradataml/data/souvenir_timeseries.csv +167 -167
  841. teradataml/data/sparse_iris_attribute.csv +5 -5
  842. teradataml/data/sparse_iris_test.csv +121 -121
  843. teradataml/data/sparse_iris_train.csv +601 -601
  844. teradataml/data/star1.csv +6 -6
  845. teradataml/data/state_transition.csv +5 -5
  846. teradataml/data/stock_data.csv +53 -53
  847. teradataml/data/stock_movement.csv +11 -11
  848. teradataml/data/stock_vol.csv +76 -76
  849. teradataml/data/stop_words.csv +8 -8
  850. teradataml/data/store_sales.csv +37 -37
  851. teradataml/data/stringsimilarity_example.json +7 -7
  852. teradataml/data/strsimilarity_input.csv +13 -13
  853. teradataml/data/students.csv +101 -101
  854. teradataml/data/svm_iris_input_test.csv +121 -121
  855. teradataml/data/svm_iris_input_train.csv +481 -481
  856. teradataml/data/svm_iris_model.csv +7 -7
  857. teradataml/data/svmdense_example.json +9 -9
  858. teradataml/data/svmdensepredict_example.json +18 -18
  859. teradataml/data/svmsparse_example.json +7 -7
  860. teradataml/data/svmsparsepredict_example.json +13 -13
  861. teradataml/data/svmsparsesummary_example.json +7 -7
  862. teradataml/data/target_mobile_data.csv +13 -13
  863. teradataml/data/target_mobile_data_dense.csv +5 -5
  864. teradataml/data/templatedata.csv +1201 -1201
  865. teradataml/data/templates/open_source_ml.json +9 -0
  866. teradataml/data/teradataml_example.json +73 -1
  867. teradataml/data/test_classification.csv +101 -0
  868. teradataml/data/test_loan_prediction.csv +53 -53
  869. teradataml/data/test_pacf_12.csv +37 -37
  870. teradataml/data/test_prediction.csv +101 -0
  871. teradataml/data/test_regression.csv +101 -0
  872. teradataml/data/test_river2.csv +109 -109
  873. teradataml/data/text_inputs.csv +6 -6
  874. teradataml/data/textchunker_example.json +7 -7
  875. teradataml/data/textclassifier_example.json +6 -6
  876. teradataml/data/textclassifier_input.csv +7 -7
  877. teradataml/data/textclassifiertrainer_example.json +6 -6
  878. teradataml/data/textmorph_example.json +5 -5
  879. teradataml/data/textparser_example.json +15 -15
  880. teradataml/data/texttagger_example.json +11 -11
  881. teradataml/data/texttokenizer_example.json +6 -6
  882. teradataml/data/texttrainer_input.csv +11 -11
  883. teradataml/data/tf_example.json +6 -6
  884. teradataml/data/tfidf_example.json +13 -13
  885. teradataml/data/tfidf_input1.csv +201 -201
  886. teradataml/data/tfidf_train.csv +6 -6
  887. teradataml/data/time_table1.csv +535 -535
  888. teradataml/data/time_table2.csv +14 -14
  889. teradataml/data/timeseriesdata.csv +1601 -1601
  890. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  891. teradataml/data/titanic.csv +892 -892
  892. teradataml/data/token_table.csv +696 -696
  893. teradataml/data/train_multiclass.csv +101 -0
  894. teradataml/data/train_regression.csv +101 -0
  895. teradataml/data/train_regression_multiple_labels.csv +101 -0
  896. teradataml/data/train_tracking.csv +27 -27
  897. teradataml/data/transformation_table.csv +5 -5
  898. teradataml/data/transformation_table_new.csv +1 -1
  899. teradataml/data/tv_spots.csv +16 -16
  900. teradataml/data/twod_climate_data.csv +117 -117
  901. teradataml/data/uaf_example.json +475 -475
  902. teradataml/data/univariatestatistics_example.json +8 -8
  903. teradataml/data/unpack_example.json +9 -9
  904. teradataml/data/unpivot_example.json +9 -9
  905. teradataml/data/unpivot_input.csv +8 -8
  906. teradataml/data/us_air_pass.csv +36 -36
  907. teradataml/data/us_population.csv +624 -624
  908. teradataml/data/us_states_shapes.csv +52 -52
  909. teradataml/data/varmax_example.json +17 -17
  910. teradataml/data/vectordistance_example.json +25 -25
  911. teradataml/data/ville_climatedata.csv +121 -121
  912. teradataml/data/ville_tempdata.csv +12 -12
  913. teradataml/data/ville_tempdata1.csv +12 -12
  914. teradataml/data/ville_temperature.csv +11 -11
  915. teradataml/data/waveletTable.csv +1605 -1605
  916. teradataml/data/waveletTable2.csv +1605 -1605
  917. teradataml/data/weightedmovavg_example.json +8 -8
  918. teradataml/data/wft_testing.csv +5 -5
  919. teradataml/data/wine_data.csv +1600 -0
  920. teradataml/data/word_embed_input_table1.csv +5 -5
  921. teradataml/data/word_embed_input_table2.csv +4 -4
  922. teradataml/data/word_embed_model.csv +22 -22
  923. teradataml/data/words_input.csv +13 -13
  924. teradataml/data/xconvolve_complex_left.csv +6 -6
  925. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  926. teradataml/data/xgboost_example.json +35 -35
  927. teradataml/data/xgboostpredict_example.json +31 -31
  928. teradataml/dataframe/copy_to.py +1764 -1698
  929. teradataml/dataframe/data_transfer.py +2753 -2745
  930. teradataml/dataframe/dataframe.py +17545 -16946
  931. teradataml/dataframe/dataframe_utils.py +1837 -1740
  932. teradataml/dataframe/fastload.py +611 -603
  933. teradataml/dataframe/indexer.py +424 -424
  934. teradataml/dataframe/setop.py +1179 -1166
  935. teradataml/dataframe/sql.py +10090 -6432
  936. teradataml/dataframe/sql_function_parameters.py +439 -388
  937. teradataml/dataframe/sql_functions.py +652 -652
  938. teradataml/dataframe/sql_interfaces.py +220 -220
  939. teradataml/dataframe/vantage_function_types.py +674 -630
  940. teradataml/dataframe/window.py +693 -692
  941. teradataml/dbutils/__init__.py +3 -3
  942. teradataml/dbutils/dbutils.py +1167 -1150
  943. teradataml/dbutils/filemgr.py +267 -267
  944. teradataml/gen_ai/__init__.py +2 -2
  945. teradataml/gen_ai/convAI.py +472 -472
  946. teradataml/geospatial/__init__.py +3 -3
  947. teradataml/geospatial/geodataframe.py +1105 -1094
  948. teradataml/geospatial/geodataframecolumn.py +392 -387
  949. teradataml/geospatial/geometry_types.py +925 -925
  950. teradataml/hyperparameter_tuner/__init__.py +1 -1
  951. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  952. teradataml/hyperparameter_tuner/utils.py +281 -187
  953. teradataml/lib/aed_0_1.dll +0 -0
  954. teradataml/lib/libaed_0_1.dylib +0 -0
  955. teradataml/lib/libaed_0_1.so +0 -0
  956. teradataml/libaed_0_1.dylib +0 -0
  957. teradataml/libaed_0_1.so +0 -0
  958. teradataml/opensource/__init__.py +1 -0
  959. teradataml/opensource/sklearn/__init__.py +1 -0
  960. teradataml/opensource/sklearn/_class.py +255 -0
  961. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  962. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  963. teradataml/opensource/sklearn/constants.py +54 -0
  964. teradataml/options/__init__.py +121 -124
  965. teradataml/options/configure.py +337 -336
  966. teradataml/options/display.py +176 -176
  967. teradataml/plot/__init__.py +2 -2
  968. teradataml/plot/axis.py +1388 -1388
  969. teradataml/plot/constants.py +15 -15
  970. teradataml/plot/figure.py +398 -398
  971. teradataml/plot/plot.py +760 -760
  972. teradataml/plot/query_generator.py +83 -83
  973. teradataml/plot/subplot.py +216 -216
  974. teradataml/scriptmgmt/UserEnv.py +3788 -3761
  975. teradataml/scriptmgmt/__init__.py +3 -3
  976. teradataml/scriptmgmt/lls_utils.py +1616 -1604
  977. teradataml/series/series.py +532 -532
  978. teradataml/series/series_utils.py +71 -71
  979. teradataml/table_operators/Apply.py +949 -917
  980. teradataml/table_operators/Script.py +1719 -1982
  981. teradataml/table_operators/TableOperator.py +1207 -1616
  982. teradataml/table_operators/__init__.py +2 -3
  983. teradataml/table_operators/apply_query_generator.py +262 -262
  984. teradataml/table_operators/query_generator.py +507 -507
  985. teradataml/table_operators/table_operator_query_generator.py +460 -460
  986. teradataml/table_operators/table_operator_util.py +631 -639
  987. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  988. teradataml/table_operators/templates/dataframe_map.template +176 -176
  989. teradataml/table_operators/templates/script_executor.template +170 -170
  990. teradataml/utils/dtypes.py +684 -684
  991. teradataml/utils/internal_buffer.py +84 -84
  992. teradataml/utils/print_versions.py +205 -205
  993. teradataml/utils/utils.py +410 -410
  994. teradataml/utils/validators.py +2239 -2115
  995. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +270 -41
  996. teradataml-20.0.0.0.dist-info/RECORD +1038 -0
  997. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +1 -1
  998. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +1 -1
  999. teradataml/analytics/mle/AdaBoost.py +0 -651
  1000. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1001. teradataml/analytics/mle/Antiselect.py +0 -342
  1002. teradataml/analytics/mle/Arima.py +0 -641
  1003. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1004. teradataml/analytics/mle/Attribution.py +0 -1070
  1005. teradataml/analytics/mle/Betweenness.py +0 -658
  1006. teradataml/analytics/mle/Burst.py +0 -711
  1007. teradataml/analytics/mle/CCM.py +0 -600
  1008. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1009. teradataml/analytics/mle/CFilter.py +0 -460
  1010. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1011. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1012. teradataml/analytics/mle/Closeness.py +0 -737
  1013. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1014. teradataml/analytics/mle/Correlation.py +0 -477
  1015. teradataml/analytics/mle/Correlation2.py +0 -573
  1016. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1017. teradataml/analytics/mle/CoxPH.py +0 -556
  1018. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1019. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1020. teradataml/analytics/mle/DTW.py +0 -623
  1021. teradataml/analytics/mle/DWT.py +0 -564
  1022. teradataml/analytics/mle/DWT2D.py +0 -599
  1023. teradataml/analytics/mle/DecisionForest.py +0 -716
  1024. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1025. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1026. teradataml/analytics/mle/DecisionTree.py +0 -830
  1027. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1028. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1029. teradataml/analytics/mle/FMeasure.py +0 -402
  1030. teradataml/analytics/mle/FPGrowth.py +0 -734
  1031. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1032. teradataml/analytics/mle/GLM.py +0 -558
  1033. teradataml/analytics/mle/GLML1L2.py +0 -547
  1034. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1035. teradataml/analytics/mle/GLMPredict.py +0 -529
  1036. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1037. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1038. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1039. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1040. teradataml/analytics/mle/Histogram.py +0 -561
  1041. teradataml/analytics/mle/IDWT.py +0 -476
  1042. teradataml/analytics/mle/IDWT2D.py +0 -493
  1043. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1044. teradataml/analytics/mle/Interpolator.py +0 -918
  1045. teradataml/analytics/mle/KMeans.py +0 -485
  1046. teradataml/analytics/mle/KNN.py +0 -627
  1047. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1048. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1049. teradataml/analytics/mle/LAR.py +0 -439
  1050. teradataml/analytics/mle/LARPredict.py +0 -478
  1051. teradataml/analytics/mle/LDA.py +0 -548
  1052. teradataml/analytics/mle/LDAInference.py +0 -492
  1053. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1054. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1055. teradataml/analytics/mle/LinReg.py +0 -433
  1056. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1057. teradataml/analytics/mle/MinHash.py +0 -544
  1058. teradataml/analytics/mle/Modularity.py +0 -587
  1059. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1060. teradataml/analytics/mle/NERExtractor.py +0 -595
  1061. teradataml/analytics/mle/NERTrainer.py +0 -458
  1062. teradataml/analytics/mle/NGrams.py +0 -570
  1063. teradataml/analytics/mle/NPath.py +0 -634
  1064. teradataml/analytics/mle/NTree.py +0 -549
  1065. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1066. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1067. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1068. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1069. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1070. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1071. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1072. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1073. teradataml/analytics/mle/POSTagger.py +0 -417
  1074. teradataml/analytics/mle/Pack.py +0 -411
  1075. teradataml/analytics/mle/PageRank.py +0 -535
  1076. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1077. teradataml/analytics/mle/PathGenerator.py +0 -367
  1078. teradataml/analytics/mle/PathStart.py +0 -464
  1079. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1080. teradataml/analytics/mle/Pivot.py +0 -471
  1081. teradataml/analytics/mle/ROC.py +0 -425
  1082. teradataml/analytics/mle/RandomSample.py +0 -637
  1083. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1084. teradataml/analytics/mle/SAX.py +0 -779
  1085. teradataml/analytics/mle/SVMDense.py +0 -677
  1086. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1087. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1088. teradataml/analytics/mle/SVMSparse.py +0 -557
  1089. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1090. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1091. teradataml/analytics/mle/Sampling.py +0 -549
  1092. teradataml/analytics/mle/Scale.py +0 -565
  1093. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1094. teradataml/analytics/mle/ScaleMap.py +0 -378
  1095. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1096. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1097. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1098. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1099. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1100. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1101. teradataml/analytics/mle/Sessionize.py +0 -475
  1102. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1103. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1104. teradataml/analytics/mle/TF.py +0 -389
  1105. teradataml/analytics/mle/TFIDF.py +0 -504
  1106. teradataml/analytics/mle/TextChunker.py +0 -414
  1107. teradataml/analytics/mle/TextClassifier.py +0 -399
  1108. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1109. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1110. teradataml/analytics/mle/TextMorph.py +0 -494
  1111. teradataml/analytics/mle/TextParser.py +0 -623
  1112. teradataml/analytics/mle/TextTagger.py +0 -530
  1113. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1114. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1115. teradataml/analytics/mle/Unpack.py +0 -526
  1116. teradataml/analytics/mle/Unpivot.py +0 -438
  1117. teradataml/analytics/mle/VarMax.py +0 -776
  1118. teradataml/analytics/mle/VectorDistance.py +0 -762
  1119. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1120. teradataml/analytics/mle/XGBoost.py +0 -842
  1121. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1122. teradataml/analytics/mle/__init__.py +0 -123
  1123. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1124. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1125. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1126. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1127. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1128. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1129. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1130. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1131. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1132. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1133. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1134. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1135. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1136. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1137. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1138. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1139. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1140. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1141. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1142. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1143. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1144. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1145. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1146. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1147. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1148. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1149. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1150. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1151. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1152. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1153. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1154. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1155. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1156. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1157. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1158. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1159. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1160. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1161. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1162. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1163. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1164. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1165. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1166. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1167. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1168. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1169. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1170. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1171. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1172. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1173. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1174. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1175. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1176. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1177. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1178. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1179. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1180. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1181. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1182. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1183. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1184. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1185. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1186. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1187. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1188. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1189. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1190. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1191. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1192. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1193. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1194. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1195. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1196. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1197. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1198. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1199. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1200. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1201. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1202. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1203. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1204. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1205. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1206. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1207. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1208. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1209. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1210. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1211. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1212. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1213. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1214. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1215. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1216. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1217. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1218. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1219. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1220. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1221. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1222. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1223. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1224. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1225. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1226. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1227. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1228. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1229. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1230. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1231. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1232. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1233. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1234. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1235. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1236. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1237. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1238. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1239. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1240. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1241. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1242. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1243. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1244. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1245. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1246. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1247. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1248. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1249. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1250. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1251. teradataml/analytics/sqle/Antiselect.py +0 -321
  1252. teradataml/analytics/sqle/Attribution.py +0 -603
  1253. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1254. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1255. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1256. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1257. teradataml/analytics/sqle/NPath.py +0 -632
  1258. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1259. teradataml/analytics/sqle/Pack.py +0 -388
  1260. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1261. teradataml/analytics/sqle/Sessionize.py +0 -390
  1262. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1263. teradataml/analytics/sqle/Unpack.py +0 -503
  1264. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1265. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1266. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1267. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1268. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1269. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1270. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1271. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1272. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1273. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1274. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1275. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1276. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1277. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1278. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1279. teradataml/catalog/model_cataloging.py +0 -980
  1280. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1281. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1282. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1283. teradataml/table_operators/sandbox_container_util.py +0 -643
  1284. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1285. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
@@ -1,1740 +1,1837 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
-
4
- Unpublished work.
5
- Copyright (c) 2018 by Teradata Corporation. All rights reserved.
6
- TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
7
-
8
- Primary Owner: mark.sandan@teradata.com
9
- Secondary Owner:
10
-
11
- This file implements util functions of data frame.
12
- """
13
-
14
- import numbers
15
- import pandas as pd
16
- from collections import OrderedDict
17
-
18
- from teradataml.common.utils import UtilFuncs
19
- from teradataml.common.aed_utils import AedUtils
20
- from teradataml.common.constants import AEDConstants, PTITableConstants, \
21
- SQLPattern, PythonTypes
22
- from teradataml.common.sqlbundle import SQLBundle
23
- from teradataml.common.exceptions import TeradataMlException
24
- from teradataml.common.messages import Messages
25
- from teradataml.common.messagecodes import MessageCodes
26
-
27
- from teradataml.context.context import get_context, get_connection
28
- from teradataml.context.context import _get_current_databasename
29
- from teradataml.dbutils.dbutils import _execute_query_and_generate_pandas_df
30
-
31
- from teradataml.options.display import display
32
- from teradataml.options.configure import configure
33
- from teradataml.utils.utils import execute_sql
34
-
35
- from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
36
- from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
37
- import teradataml.dataframe as tdmldf
38
-
39
- from sqlalchemy.sql import select
40
- from sqlalchemy.sql.expression import text
41
- from sqlalchemy import table, column, func
42
- from datetime import datetime, date, time
43
- from decimal import Decimal
44
-
45
- # TODO - Need to write unit testcases for these functions
46
- class DataFrameUtils():
47
-
48
- @staticmethod
49
- def _execute_node_return_db_object_name(nodeid, metaexpression = None):
50
- """
51
- Fetches queries and view names from AED node and creates views from queries
52
- Additionally inspects the metaexpression for consistency
53
-
54
- PARAMETERS:
55
- nodeid: nodeid to execute
56
- metaexpression: (optional) updated _metaexpr to validate
57
-
58
- EXAMPLES:
59
- _execute_node_return_db_object_name(nodeid)
60
- _execute_node_return_db_object_name(nodeid, metaexpr)
61
-
62
- RETURNS:
63
- Top level view name.
64
-
65
- """
66
- aed_obj = AedUtils()
67
- if not aed_obj._aed_is_node_executed(nodeid):
68
-
69
- view_query_node_type_list = aed_obj._aed_get_exec_query(nodeid)
70
- view_names, queries, node_query_types, node_ids = view_query_node_type_list
71
-
72
- # Executing Nodes / Creating Views
73
- for index in range(len(queries) - 1, -1, -1):
74
- is_persist = False
75
- if metaexpression and metaexpression._is_persist:
76
- is_persist = True
77
-
78
- try:
79
- if node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_ML_QUERY_MULTI_OUTPUT.value or\
80
- ("OUT TABLE " in queries[index] and SQLPattern.SQLMR.value.match(queries[index])) or \
81
- is_persist:
82
- # TODO:: OR condition in above needs to be removed once AED support is added.
83
- UtilFuncs._create_table(view_names[index], queries[index])
84
-
85
- elif node_query_types in ['groupby', 'groupbytime']:
86
- # If query_type is either groupby or groupbytime get it's parent
87
- # nodeid and execute queries for the same
88
- parent_nodeid = aed_obj._aed_get_parent_nodeids(nodeid)[0]
89
- DataFrameUtils._execute_node_return_db_object_name(parent_nodeid)
90
-
91
- elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_REFERENCE.value:
92
- # Reference nodes - To be ignored.
93
- pass
94
-
95
- else:
96
- UtilFuncs._create_view(view_names[index], queries[index])
97
-
98
- # Updating Node Status for executed Node
99
- aed_obj._aed_update_node_state_single(node_ids[index], AEDConstants.AED_NODE_EXECUTED.value)
100
-
101
- except Exception as emsg:
102
- # TODO:: Append node execution details to emsg.
103
- # Node description, such as nodeType or node operation, should be added
104
- # here in 'emsg' to give away more information, where exactly
105
- # node execution failed.
106
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
107
- MessageCodes.TDMLDF_EXEC_SQL_FAILED)
108
-
109
- # Setting New Table name retrieved to TDML DF
110
- result_table_view_name = aed_obj._aed_get_tablename(nodeid)
111
- # validate the metaexpression
112
- if configure._validate_metaexpression:
113
- DataFrameUtils._validate_metaexpression(result_table_view_name, metaexpression)
114
-
115
- return result_table_view_name
116
-
117
- @staticmethod
118
- def _validate_metaexpression(result_table_view_name, metaexpression):
119
- """
120
- Inspects the metaexpression for consistency with the underlying table/view
121
-
122
- PARAMETERS:
123
- result_table_view_name: a string representing the table/view name to check column metadata
124
- metaexpression: the metaexpr of the DataFrame to compare against the result_table_view_name
125
-
126
- EXAMPLES:
127
- _validate_metaexpression('t1', df._metaexpr)
128
- _execute_node_return_db_object_name(nodeid, metaexpr)
129
-
130
- RETURNS:
131
- None
132
- Outputs RuntimeWarnings if mismatches are found
133
-
134
- """
135
- # metaexpression should have already been updated
136
- if metaexpression is not None:
137
-
138
- name = lambda x: x[0]
139
- type_ = lambda x: x[1]
140
-
141
- # compare sorted by name of column
142
- df = sorted(UtilFuncs._describe_column(DataFrameUtils._get_metadata_from_table(result_table_view_name)), key = lambda x: x[0])
143
- meta = sorted(metaexpression.c, key = lambda x: x.name)
144
-
145
- # check length
146
- if len(df) == len(meta):
147
- for i in range(len(df)):
148
-
149
- # map Teradata type to python type
150
- meta_type = UtilFuncs._teradata_type_to_python_type(meta[i].type)
151
-
152
- # compare column names and types
153
- if meta[i].name != name(df[i]) or meta_type != type_(df[i]):
154
- err_msg = "[Mismatch when checking %s]\n\t[Table/View] %s %s\n\t[MetaExpression] %s %s (mapped from => %s)\n"
155
- raise RuntimeError(err_msg % (result_table_view_name,
156
- name(df[i]), type_(df[i]),
157
- meta[i].name, meta_type, meta[i].type))
158
- else:
159
- err_msg = "[Length mismatch when checking %s]\nSource Table/View has length %s but MetaExpression has length %s"
160
- raise RuntimeError(err_msg % (result_table_view_name, len(df), len(meta)))
161
-
162
- @staticmethod
163
- def _get_dataframe_print_string(table_name, index_label, orderby=None, undropped_index=None):
164
- """
165
- Builds string output for teradataml DataFrame
166
-
167
- PARAMETERS:
168
- table_name - Name of the database table to read from.
169
- index_label - String/List specifying column to use as index.
170
- orderby - order expression to sort returned rows
171
-
172
- EXAMPLES:
173
- _get_dataframe_print_string('table_name', None, None)
174
-
175
- RETURNS:
176
- String representation of a pandas DataFrame.
177
-
178
- """
179
- read_query = SQLBundle._build_top_n_print_query(table_name, display.max_rows, orderby)
180
-
181
- if index_label is not None:
182
- pandas_df = _execute_query_and_generate_pandas_df(read_query, index=index_label)
183
- else:
184
- pandas_df = _execute_query_and_generate_pandas_df(read_query)
185
-
186
- return pandas_df.to_string()
187
-
188
- @staticmethod
189
- def _get_pprint_dtypes(column_names_and_types, null_count=False):
190
- """
191
- returns a string containing the column names and types.
192
- If null_count is not None, the string will also contain
193
- the number of non-null values for each column.
194
-
195
- PARAMETERS:
196
- column_names_and_types - List of column names and types.
197
- null_count(optional) - List of the non-null count for each column.
198
-
199
- EXAMPLES:
200
- >>>print(_get_pprint_dtypes(column_names_and_types)
201
- accounts str
202
- Feb float
203
- Jan int
204
- Mar int
205
- Apr int
206
- datetime str
207
-
208
- >>>print(_get_pprint_dtypes(column_names_and_types, null_count)
209
- accounts 3 non-null str
210
- Feb 3 non-null float
211
- Jan 3 non-null int
212
- Mar 3 non-null int
213
- Apr 3 non-null int
214
- datetime 3 non-null str
215
-
216
- RAISES:
217
-
218
- """
219
-
220
- col_names = [i[0] for i in column_names_and_types]
221
- col_types = [i[1] for i in column_names_and_types]
222
- max_col_names = len(max(col_names, key=len)) + 4
223
- max_col_types = len(max(col_types, key=len))
224
- dtypes_string = ""
225
- if not null_count:
226
- for colname, coltype in column_names_and_types:
227
- dtypes_string += "{0: <{name_width}}{1: >{type_width}}\n".format(colname, coltype,
228
- name_width=max_col_names,
229
- type_width=max_col_types)
230
- else:
231
- null_count = [i[2] for i in column_names_and_types]
232
- max_null_count = len(str(max(null_count, key=len)))
233
- for colname, coltype, num_nulls in column_names_and_types:
234
- dtypes_string += "{0: <{name_width}}{1: <{count_width}} non-null {2: <{type_width}}\n".format(colname,
235
- num_nulls,
236
- coltype,
237
- name_width=max_col_names,
238
- count_width=max_null_count,
239
- type_width=max_col_types)
240
- # Remove last new line character.
241
- dtypes_string = dtypes_string[:-1]
242
- return dtypes_string
243
-
244
- @staticmethod
245
- def _get_metadata_from_table(table_name):
246
- """
247
- Retrieves column metadata by executing a HELP COLUMN command.
248
-
249
- PARAMETERS:
250
- table_name - The table name or view name.
251
-
252
- RETURNS:
253
- returns the result set (column information) from HELP COLUMN.
254
-
255
- RAISES:
256
- Database error if an error occurred while executing the HELP COLUMN.
257
-
258
- EXAMPLES:
259
- df = DataFrame.from_table('mytab')
260
- metadata = _get_metadata_from_table(df._table_name)
261
- """
262
- # Construct HELP COLUMN command.
263
- help_col_sql = SQLBundle._build_help_column(table_name)
264
- # Execute HELP COLUMN command.
265
- return UtilFuncs._execute_query(help_col_sql)
266
-
267
- @staticmethod
268
- def _extract_select_string(select_expression):
269
- """
270
- Takes in a string/list representing a Pandas selection clause of any of the forms (only):
271
- a) "col1" or 'col1'
272
- b) ["col 1"] or ['col 1']
273
- c) ["col1", "col2", "col3"] or ['col1', 'col2', 'col3']
274
- d) [['col1', 'col2', 'col3']] or [["col1", "col2", "col3"]]
275
-
276
- And returns a list with column strings representing the selection of the form:
277
- a) ['col1']
278
- b) ['col 1']
279
- c) ['col1','col2','col3']
280
- d) ['col1','col2','col3']
281
-
282
- Column Names ("col1", "col2"..) are Strings representing database table Columns.
283
- All Standard Teradata Data-Types for columns supported: INTEGER, VARCHAR(5), FLOAT.
284
-
285
- PARAMETERS:
286
- selection_expression - Expression representing column selection
287
- Type - String or List of Strings or List of List (Single level only)
288
- Required - Yes
289
-
290
- EXAMPLES:
291
- UtilFuncs._extract_select_string([['col1', 'col2']])
292
- UtilFuncs._extract_select_string("col1")
293
- UtilFuncs._extract_select_string(["col1"])
294
- UtilFuncs._extract_select_string(["col1","col2","col3"])
295
-
296
- RETURNS:
297
- List of Strings representing column names.
298
-
299
- RAISES:
300
- TeradataMlException
301
- """
302
- tdp = preparer(td_dialect)
303
- column_list = []
304
-
305
- # Single String column
306
- if isinstance(select_expression, str):
307
- # Error handling - Empty String
308
- if select_expression == "":
309
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
310
- MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
311
- else:
312
- column_list.append(tdp.quote("{0}".format(select_expression.strip())))
313
-
314
- # Error Handling - [], [""], [None], ["None"], ['col1', None], ['col1', '']
315
- elif isinstance(select_expression, list) and (len(select_expression) == 0 or
316
- any(element in [None, "None", ""] for element in select_expression)):
317
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
318
- MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
319
-
320
- # List - ["col1"] or ["col1", "col2", "col3"]
321
- elif isinstance(select_expression, list) and all(isinstance(element, str) for element in select_expression):
322
- if len(select_expression) == 1:
323
- column_list.append(tdp.quote("{0}".format(select_expression[0].strip())))
324
- else:
325
- column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression]
326
-
327
- # List of List (Single level only - Pandas Syntax) - [["col1", "col2", "col3"]]
328
- elif isinstance(select_expression, list) and isinstance(select_expression[0], list):
329
- # Error Handling - [[]], [[""]], [[None]], [['col1', None]], [['col1', "None"]], ["col1", ""]
330
- if len(select_expression[0]) == 0 or any(element in [None, "None", ""] for element in select_expression[0]):
331
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
332
- MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
333
-
334
- else:
335
- column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression[0]]
336
-
337
- # Any other Format - Raise Format Exception
338
- else:
339
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_FORMAT),
340
- MessageCodes.TDMLDF_SELECT_INVALID_FORMAT)
341
- return column_list
342
-
343
- @staticmethod
344
- def _get_primary_index_from_table(table_name):
345
- """
346
- Retrieves the primary index by executing a HELP INDEX command.
347
- PARAMETERS:
348
- table_name - The table name or volatile table name.
349
- RETURNS:
350
- Returns a list containing the primary index columns from HELP INDEX.
351
- If the there are no primary index (NoPI table), then returns None.
352
- RAISES:
353
- Database error if an error occurred while executing the HELP INDEX.
354
- EXAMPLES:
355
- df = DataFrame.from_table('mytab')
356
- index_labels = df._get_metadata_from_table(df._table_name)
357
- """
358
- # Construct HELP INDEX command.
359
- help_index_sql = SQLBundle._build_help_index(table_name)
360
-
361
- # Execute HELP INDEX command.
362
- rows = UtilFuncs._execute_query(help_index_sql)
363
- index_labels = []
364
- for row in rows:
365
- # row[1] specifies whether the Index is 'Primary or Secondary?'
366
- if row[1].rstrip() == 'P':
367
- # row[2] specifies a string of comma separated column names that form the primary index
368
- if "," in row[2]:
369
- index_cols = row[2].split(',')
370
- else:
371
- index_cols = [row[2]]
372
- for index_col in index_cols:
373
- # Since TD_TIMEBUCKET column in PTI tables is not functionally available, it can be ignored
374
- # from the index information as well (else a warning is generated by SQLAlchemy).
375
- # row[12] corresponds to 'Timebucket' column in the results of 'help index' SQL command, which
376
- # is available only when the version supports PTI tables.
377
- if index_col == PTITableConstants.TD_TIMEBUCKET.value and len(row) > 11 and row[12] is not None:
378
- continue
379
- else:
380
- index_labels.append(index_col)
381
-
382
- if len(index_labels) > 0:
383
- return index_labels
384
- else:
385
- return None
386
-
387
- @staticmethod
388
- def __validate_sort_type_raise_exception(sort_col_type):
389
- """
390
- Function to raise TeradatamlException for errors encountered for invalid/incorrect
391
- "sort_col_type" in "_validate_sort_type" function.
392
-
393
- PARAMETERS:
394
- sort_col_type: The sort column type.
395
-
396
- RETURNS:
397
- None
398
-
399
- RAISES:
400
- TeradataMlException
401
-
402
- EXAMPLES:
403
- df_utils.__validate_sort_type_raise_exception(PythonTypes.PY_STRING_TYPE.value)
404
- """
405
- msg = Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE).format(sort_col_type)
406
- raise TeradataMlException(msg, MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE)
407
-
408
- @staticmethod
409
- def _validate_sort_col_type(sort_col_type, sort_col_values):
410
- """
411
- Validates a list of sort column values with the sort column type.
412
-
413
- PARAMETERS:
414
- sort_col_type - The sort column type.
415
- sort_col_values - A single value or list-like values
416
-
417
- RETURNS:
418
- None
419
-
420
- RAISES:
421
- TeradataMlException
422
-
423
- EXAMPLES:
424
- df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, ["Jan", "Feb"])
425
- df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, "Jan")
426
- df_utils._validate_sort_col_type(PythonTypes.PY_INT_TYPE.value, [1, 2])
427
- """
428
- if isinstance(sort_col_values, list):
429
- if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
430
- if not all(isinstance(i, str) for i in sort_col_values):
431
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
432
- elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
433
- if not all(isinstance(i, float) for i in sort_col_values):
434
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
435
- elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
436
- if not all(isinstance(i, Decimal) for i in sort_col_values):
437
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
438
- elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
439
- if not all(isinstance(i, datetime) for i in sort_col_values):
440
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
441
- elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
442
- if not all(isinstance(i, time) for i in sort_col_values):
443
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
444
- elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
445
- if not all(isinstance(i, date) for i in sort_col_values):
446
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
447
- elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
448
- if not all(isinstance(i, bytes) for i in sort_col_values):
449
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
450
- else: # numeric type
451
- if not all(isinstance(i, numbers.Integral) for i in sort_col_values):
452
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
453
- elif isinstance(sort_col_values, (tuple, dict)):
454
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
455
- MessageCodes.TDMLDF_DROP_ARGS)
456
- else:
457
- if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
458
- if not isinstance(sort_col_values, str):
459
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
460
- elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
461
- if not isinstance(sort_col_values, float):
462
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
463
- elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
464
- if not isinstance(sort_col_values, Decimal):
465
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
466
- elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
467
- if not isinstance(sort_col_values, datetime):
468
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
469
- elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
470
- if not isinstance(sort_col_values, time):
471
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
472
- elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
473
- if not isinstance(sort_col_values, date):
474
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
475
- elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
476
- if not isinstance(sort_col_values, bytes):
477
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
478
- else: # numeric type
479
- if not isinstance(sort_col_values, numbers.Integral):
480
- DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
481
-
482
- def _get_required_columns_types_from_metaexpr(metaexpr, col_list = None):
483
- """
484
- Retrieves column names and types from meta expression. If you want to get types for only some columns,
485
- pass those columns to 'col_list' argument.
486
-
487
- PARAMETERS:
488
- metaexpr - Meta expression from which columns and types to be retrieved.
489
- col_list - Column list for which you want to get types
490
-
491
- RETURNS:
492
- Dictionary: key as column name and datatype as value.
493
-
494
- EXAMPLES:
495
- df = DataFrame.from_table('mytab')
496
- metadata = _get_required_columns_types_from_metaexpr()
497
- """
498
-
499
- if isinstance(col_list, str):
500
- col_list = [col_list]
501
-
502
- if col_list is not None and not isinstance(col_list, list):
503
- return None
504
-
505
- meta_cols = metaexpr.t.c
506
- meta_columns = [c.name for c in meta_cols]
507
- col_names = []
508
- col_types = []
509
-
510
- # When column list to retrieve is not provided, return meta-data for all columns.
511
- if col_list is None:
512
- for col_name in meta_columns:
513
- col_names.append(meta_cols[col_name].name)
514
- col_types.append(meta_cols[col_name].type)
515
-
516
- # Return meta-data for only requested columns otherwise.
517
- else:
518
- for col_name in col_list:
519
- if DataFrameUtils._check_column_exists(col_name, meta_columns):
520
- # _metaexpr saves columns without quotes, so unquoting.
521
- unquoted_col_name = col_name.replace('"', "")
522
- col_names.append(meta_cols[unquoted_col_name].name)
523
- col_types.append(meta_cols[unquoted_col_name].type)
524
-
525
- return OrderedDict(zip(col_names, col_types))
526
-
527
- @staticmethod
528
- def _check_column_exists(column_name, df_columns):
529
- """
530
- Checks provide column present in list of columns or not.
531
- Note:
532
- It is calling functions responsibility to send the column and columns list in proper case.
533
- By default the look up is case-sensitive. If they would like to have it case insensitive, then
534
- one should send the the column_name and df_columns list in lower case.
535
-
536
- PARAMETERS:
537
- column_name - Column name which need to be check.
538
- df_columns - List columns in which column to be check.
539
-
540
- RETURNS:
541
- True if column exists otherwase False.
542
-
543
- EXAMPLES:
544
- df = DataFrame.from_table('mytab')
545
- metadata = _check_column_exists("col1", df.columns)
546
- """
547
- unquoted_df_columns = [column.replace('"', "") for column in df_columns]
548
- if column_name.replace('"', "") in unquoted_df_columns:
549
- return True
550
- else:
551
- return False
552
-
553
- @staticmethod
554
- def _validate_agg_function(func, col_names):
555
- """
556
- Internal function to validate column names against actual
557
- column names passed as parameter and aggregate operations
558
- against valid aggregate operations.
559
-
560
- PARAMETERS:
561
- func - (Required) Specifies the function(s) to be
562
- applied on teradataml DataFrame columns.
563
- Acceptable formats for function(s) are string,
564
- dictionary or list of strings/functions.
565
- Accepted combinations are:
566
- 1. String function name
567
- 2. List of string functions
568
- 3. Dictionary of column names -> string function
569
- (or list of string functions)
570
- col_names - List. Names of the columns in Dataframe.
571
-
572
- RETURNS:
573
- operations - dict of columns -> aggregate operations
574
- Unified dictionary, similar to func, even for string and
575
- list of strings or functions.
576
-
577
- RAISES:
578
- 1. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
579
- operation(s) received in parameter 'func' is/are
580
- invalid.
581
-
582
- Possible Value :
583
- Invalid aggregate operation(s): minimum, counter.
584
- Valid aggregate operation(s): count, max, mean, min,
585
- std, sum.
586
-
587
- 2. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
588
- specified in 'func' is not present in the dataframe.
589
-
590
- Possible Value :
591
- Invalid column(s) given in parameter func: col1.
592
- Valid column(s) : A, B, C, D.
593
-
594
- EXAMPLES:
595
- Let the dataframe contain 2 columns, col1 and col2.
596
-
597
- VALID EXAMPLES:
598
- 1. operations = DataFrameUtils._validate_agg_function(
599
- operation = 'mean', ['col1', 'col2'])
600
-
601
- 2. operations = DataFrameUtils._validate_agg_function(
602
- operation = ['mean', 'min'], ['col1', 'col2'])
603
-
604
- 3. operations = DataFrameUtils._validate_agg_function(
605
- {'col1' : ['mean', 'min'], 'col2' : 'count'},
606
- ['col1', 'col2'])
607
-
608
- INVALID EXAMPLES:
609
- 1. operations = DataFrameUtils._validate_agg_function(
610
- operation = 'counter', ['col1', 'col2'])
611
-
612
- 2. operations = DataFrameUtils._validate_agg_function(
613
- {'col1' : ['mean', 'min'], 'col55' : 'count'},
614
- ['col1', 'col2'])
615
- """
616
- operations = OrderedDict()
617
-
618
- valid_aggregate_operations = UtilFuncs._get_valid_aggregate_operations()
619
-
620
- if isinstance(func, str):
621
- for column in col_names:
622
- operations[column] = [func]
623
- elif isinstance(func, list):
624
- for column in col_names:
625
- operations[column] = func
626
- else:
627
- for column in func:
628
- if isinstance(func[column], str):
629
- func[column] = [func[column]] # Converts string inside dict to list
630
- operations = func
631
-
632
- given_columns = operations.keys()
633
- invalid_columns = []
634
- all_operations = []
635
- for col in given_columns:
636
- all_operations.extend(operations[col])
637
- if col not in col_names:
638
- invalid_columns.append(col)
639
- if len(invalid_columns) > 0: # If any of the columns specified is not present in dataframe
640
- col_names.sort()
641
- invalid_columns.sort()
642
- msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
643
- format(", ".join(invalid_columns), 'func', ", ".join(col_names))
644
- raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
645
-
646
- all_operations = list(set(all_operations))
647
- invalid_aggregates = []
648
- for operation in all_operations:
649
- if operation not in valid_aggregate_operations \
650
- and operation not in UtilFuncs._get_valid_time_series_aggregate_operations():
651
- invalid_aggregates.append(operation)
652
- if len(invalid_aggregates) > 0: # If any of the aggregate operations specified is not valid
653
- # To raise error message, let's add other time series aggregate operations those can be
654
- # used with DataFrame.agg() method.
655
- valid_aggregate_operations = valid_aggregate_operations + ['first', 'last', 'mode']
656
- valid_aggregate_operations.sort()
657
- invalid_aggregates.sort()
658
- msg = Messages.get_message(MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION). \
659
- format(", ".join(invalid_aggregates), ", ".join(valid_aggregate_operations))
660
- raise TeradataMlException(msg, MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION)
661
-
662
- return operations
663
-
664
- @staticmethod
665
- def _generate_aggregate_column_expression(df, column, operation, describe_op, tdp, **kwargs):
666
- """
667
- Function generate the aggregate column expression for the provided column
668
- and aggregate function.
669
-
670
- PARAMETERS:
671
- df:
672
- Required Argument.
673
- Specifies teradataml DataFrame which is to be used to get the
674
- desired aggregate column expression.
675
- Types: teradataml DataFrame
676
-
677
- column:
678
- Required Argument.
679
- Specifies the column name for which desired aggregate operation is
680
- to be used.
681
- Types: str
682
-
683
- operation:
684
- Required Argument.
685
- Specifies the aggregate operation.
686
- Types: str
687
-
688
- describe_op:
689
- Required Argument.
690
- Specifies a boolean flag, that will decide whether the aggregate
691
- operation is being performed for DataFrame.describe() or not.
692
- Types: bool
693
-
694
- tdp:
695
- Required Argument.
696
- Specifies a TeradataIdentifierPreparer object. It is required for
697
- quoting.
698
- Types: TeradataIdentifierPreparer
699
-
700
- kwargs:
701
- Specifies miscellaneous keyword arguments that can be passed to
702
- aggregate functions.
703
-
704
- RAISES:
705
- AttributeError - In case ColumnExpression does not have desired aggregate
706
- function implemnted.
707
-
708
- RETURNS:
709
- A boolean stating whether column is supported or not, New column name,
710
- New column type, A string representing column aggregate expression,
711
- invalid column information in case column has unsupported type for an
712
- aggregate operation.
713
-
714
- EXAMPLES:
715
- column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
716
- DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
717
- describe_op=describe_op, percentile=percentile,
718
- tdp=tdp, **kwargs)
719
- """
720
- try:
721
- key_to_process = ""
722
- # quote column names same as that of the Teradata reserved keywords.
723
- if "sort_columns" in kwargs:
724
- key_to_process = "sort_columns"
725
- elif "sort_column" in kwargs:
726
- key_to_process = "sort_column"
727
-
728
- if key_to_process:
729
- quoted_columns = UtilFuncs._process_for_teradata_keyword(kwargs[key_to_process])
730
- kwargs[key_to_process] = quoted_columns
731
-
732
- func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
733
- new_column_name = column if describe_op else "{1}_{0}".format(column, operation)
734
- # column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str
735
- return True, new_column_name, NUMBER() if describe_op else func_expression.type, \
736
- func_expression.compile_label(new_column_name), None
737
- except AttributeError:
738
- # We are here means, provided operation is invalid and is not supported.
739
- # This if for internal purpose only.
740
- # Validation of operations for "agg" should be done in "agg" only.
741
- raise RuntimeError("Invalid aggregate function: {}".format(operation))
742
- except RuntimeError:
743
- # We are here means, column does not support the provided operation.
744
- # We will ignore this and add the column to invalid column list.
745
- # invalid_columns[operation].append("({0} - {1})".format(column, column_type)) OR
746
- # We will raise Generic message, mentioning DF does not have any column with type
747
- # supported to perform an operation.
748
- if describe_op:
749
- return True, tdp.quote(column), NUMBER(), 'null as {}'.format(tdp.quote(column)), None
750
- else:
751
- return False, None, None, None, "({0} - {1})".format(column, df[column].type)
752
- except Exception:
753
- raise
754
-
755
- @staticmethod
756
- def _construct_sql_expression_for_aggregations(df, column_names, column_types, func, percentile=.5,
757
- describe_op=False, **kwargs):
758
- """
759
- Internal function to create and return the sql expression
760
- corresponding to given operation, given column_names and
761
- column_types.
762
-
763
- Column_types are used to check whether all the datatypes are
764
- valid types for given operation and throw exception if they
765
- are not.
766
-
767
- PARAMETERS :
768
- df:
769
- Required Argument.
770
- Specifies teradataml DataFrame which is to be used to get the desired
771
- aggregate column expression.
772
- Types: teradataml DataFrame
773
-
774
- column_names:
775
- Required Argument.
776
- Specifies the column names for which desired aggregate operation is
777
- to be executed.
778
- Types: List of strings
779
-
780
- column_types:
781
- Required Argument.
782
- Specifies the respective column types for column names.
783
- Types: List of teradatasqlalchemy types
784
-
785
- func:
786
- Required Argument.
787
- Specifies the aggregate function(s) to be applied on teradataml
788
- DataFrame columns.
789
- Types: string, dictionary or list of strings/functions.
790
- Accepted combinations are:
791
- 1. String function name
792
- 2. List of functions
793
- 3. Dictionary containing column name as key and aggregate
794
- function name (string or list of strings) as value
795
-
796
- percentile:
797
- Optional Argument.
798
- Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
799
- The default is .5, which returns the 50th percentiles.
800
- Types: float
801
-
802
- describe_op:
803
- Optional Argument.
804
- Specifies a boolean flag, that will decide whether the aggregate operation being
805
- performed is for DataFrame.describe() or not.
806
- Types: bool
807
-
808
- kwargs:
809
- Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
810
-
811
- RETURNS :
812
- a)sql expression as
813
- 1. 'min(col1) as min_col1, min(col2) as min_col2' if
814
- col1 and col2 are the columns in Dataframe and
815
- operation is 'min'
816
- 2. 'max(col1) as max_col1, max(col2) as max_col2' if
817
- col1 and col2 are the columns in Dataframe and
818
- operation is 'max'
819
- 3. 'min(col1) as min_col1, stddev_samp(col2) as
820
- std_col2' if col1, col2 are the columns in
821
- Dataframe and operations are min, std.
822
- etc...
823
- b) new columns' names (eg min_col1, min_col2 ...)
824
- c) new columns' types
825
- RAISES:
826
- TeradataMLException
827
- 1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
828
- aggregate operations do not support specified columns.
829
-
830
- Possible Value :
831
- No results. Below is/are the error message(s):
832
- All selected columns [(col1 - VARCHAR)] is/are
833
- unsupported for 'sum' operation.
834
-
835
- 2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
836
- operation(s) received in parameter 'func' is/are
837
- invalid.
838
-
839
- Possible Value :
840
- Invalid aggregate operation(s): minimum, counter.
841
- Valid aggregate operation(s): count, max, mean, min,
842
- std, sum.
843
-
844
- 3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
845
- specified in func is not present in the dataframe.
846
-
847
- Possible Value :
848
- Invalid column(s) given in parameter func: col1.
849
- Valid column(s) : A, B, C, D.
850
-
851
- EXAMPLES:
852
- col_names, col_types = \
853
- df_utils._get_column_names_and_types_from_metaexpr(
854
- self._metaexpr)
855
- expr, new_col_names, new_col_types = \
856
- df_utils._construct_sql_expression_for_aggregations(
857
- col_names, col_types, 'min')
858
-
859
- expr1, new_col_names1, new_col_types1 = \
860
- df_utils._construct_sql_expression_for_aggregations(
861
- col_names, col_types, ['min', 'sum'])
862
-
863
- expr2, new_col_names2, new_col_types2 = \
864
- df_utils._construct_sql_expression_for_aggregations(
865
- col_names, col_types, {'col1 : ['min', 'sum'],
866
- 'col2' : 'mean'})
867
-
868
- """
869
-
870
- # eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
871
- # FLOAT(precision=0)]
872
-
873
- # eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
874
- # <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
875
- # <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
876
-
877
- # If function is of type time series aggregates, we process aggregation differently.
878
- if not isinstance(func, str):
879
- # If func is not instance of string, that means function call is
880
- # from DataFrame.agg(). And is made to process multiple functions.
881
- # We will process the same differently, as we need to map and serialize the
882
- # column names and aggregate function operate on.
883
- # If we have just function to be executed on complete DataFrame, then we don't need
884
- # this extra processing. Also, if call is from DataFrame.agg(), time series aggregate check
885
- # is not required. As special Time Series aggregate functions cannot be used in
886
- # DataFrame.agg().
887
- return DataFrameUtils._construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types,
888
- func, percentile, describe_op,
889
- **kwargs)
890
-
891
- as_time_series_aggregate = False
892
- if "as_time_series_aggregate" in kwargs.keys():
893
- as_time_series_aggregate = kwargs["as_time_series_aggregate"]
894
-
895
- if as_time_series_aggregate and func in ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top',
896
- 'top with ties']:
897
- return DataFrameUtils._construct_sql_expression_for_time_series_aggregations(df, column_names, column_types,
898
- func, **kwargs)
899
-
900
- tdp = preparer(td_dialect)
901
-
902
- # This variable is used to decide whether DataFrame has all columns unsupported
903
- # for the provided operations.
904
- all_unsupported_columns = True
905
- valid_columns = []
906
- invalid_columns = []
907
- new_column_names = []
908
- new_column_types = []
909
- for column in column_names:
910
- column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
911
- DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
912
- describe_op=describe_op, percentile=percentile,
913
- tdp=tdp, **kwargs)
914
-
915
- if column_supported:
916
- all_unsupported_columns = False
917
- new_column_names.append(new_column_name)
918
- new_column_types.append(new_column_type)
919
- valid_columns.append(column_aggr_expr)
920
- else:
921
- invalid_columns.append("({0} - {1})".format(column, df[column].type))
922
-
923
- if all_unsupported_columns:
924
-
925
- error_msgs = []
926
- invalid_columns.sort() # Helps in catching the columns in lexicographic order
927
- error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(", ".join(invalid_columns),
928
- func)
929
- error_msgs.append(error)
930
-
931
- if len(valid_columns) == 0: # No supported columns in the given list of columns
932
- raise TeradataMlException(Messages.get_message(
933
- MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
934
- MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
935
-
936
- # quote column names same as that of the Teradata reserved keywords.
937
- quote_column_name = [UtilFuncs._process_for_teradata_keyword(col) for col in column_names]
938
-
939
- # Actual columns should be retained if "drop_columns" is set to False.
940
- if kwargs.get("drop_columns") is False:
941
- valid_columns = quote_column_name + valid_columns
942
- new_column_names = column_names + new_column_names
943
- new_column_types = column_types + new_column_types
944
-
945
- aggregate_expr = ", ".join(valid_columns)
946
- return aggregate_expr, new_column_names, new_column_types
947
-
948
- @staticmethod
949
- def _construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types, func, percentile=.5,
950
- describe_op=False, **kwargs):
951
- """
952
- Internal function to create and return the sql expression
953
- corresponding to given operation, given column_names and
954
- column_types.
955
-
956
- Column_types are used to check whether all the datatypes are
957
- valid types for given operation and throw exception if they
958
- are not.
959
-
960
- PARAMETERS :
961
- df:
962
- Required Argument.
963
- Specifies teradataml DataFrame which is to be used to get the desired
964
- aggregate column expression.
965
- Types: teradataml DataFrame
966
-
967
- column_names:
968
- Required Argument.
969
- Specifies the column names for which desired aggregate operation is
970
- to be executed.
971
- Types: List of strings
972
-
973
- column_types:
974
- Required Argument.
975
- Specifies the respective column types for column names.
976
- Types: List of teradatasqlalchemy types
977
-
978
- func:
979
- Required Argument.
980
- Specifies the aggregate function(s) to be applied on teradataml
981
- DataFrame columns.
982
- Types: string, dictionary or list of strings/functions.
983
- Accepted combinations are:
984
- 1. String function name
985
- 2. List of functions
986
- 3. Dictionary containing column name as key and aggregate
987
- function name (string or list of strings) as value
988
-
989
- percentile:
990
- Optional Argument.
991
- Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
992
- The default is .5, which returns the 50th percentiles.
993
- Types: float
994
-
995
- describe_op:
996
- Optional Argument.
997
- Specifies a boolean flag, that will decide whether the aggregate operation being
998
- performed is for DataFrame.describe() or not.
999
- Types: bool
1000
-
1001
- kwargs:
1002
- Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
1003
-
1004
- RETURNS :
1005
- a)sql expression as
1006
- 1. 'min(col1) as min_col1, min(col2) as min_col2' if
1007
- col1 and col2 are the columns in Dataframe and
1008
- operation is 'min'
1009
- 2. 'max(col1) as max_col1, max(col2) as max_col2' if
1010
- col1 and col2 are the columns in Dataframe and
1011
- operation is 'max'
1012
- 3. 'min(col1) as min_col1, stddev_samp(col2) as
1013
- std_col2' if col1, col2 are the columns in
1014
- Dataframe and operations are min, std.
1015
- etc...
1016
- b) new columns' names (eg min_col1, min_col2 ...)
1017
- c) new columns' types
1018
- RAISES:
1019
- TeradataMLException
1020
- 1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
1021
- aggregate operations do not support specified columns.
1022
-
1023
- Possible Value :
1024
- No results. Below is/are the error message(s):
1025
- All selected columns [(col1 - VARCHAR)] is/are
1026
- unsupported for 'sum' operation.
1027
-
1028
- 2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
1029
- operation(s) received in parameter 'func' is/are
1030
- invalid.
1031
-
1032
- Possible Value :
1033
- Invalid aggregate operation(s): minimum, counter.
1034
- Valid aggregate operation(s): count, max, mean, min,
1035
- std, sum.
1036
-
1037
- 3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
1038
- specified in func is not present in the dataframe.
1039
-
1040
- Possible Value :
1041
- Invalid column(s) given in parameter func: col1.
1042
- Valid column(s) : A, B, C, D.
1043
-
1044
- EXAMPLES:
1045
- col_names, col_types = \
1046
- df_utils._get_column_names_and_types_from_metaexpr(
1047
- self._metaexpr)
1048
- expr, new_col_names, new_col_types = \
1049
- df_utils._construct_sql_expression_for_aggregations_for_agg(
1050
- col_names, col_types, 'min')
1051
-
1052
- expr1, new_col_names1, new_col_types1 = \
1053
- df_utils._construct_sql_expression_for_aggregations_for_agg(
1054
- col_names, col_types, ['min', 'sum'])
1055
-
1056
- expr2, new_col_names2, new_col_types2 = \
1057
- df_utils._construct_sql_expression_for_aggregations_for_agg(
1058
- col_names, col_types, {'col1 : ['min', 'sum'],
1059
- 'col2' : 'mean'})
1060
-
1061
- """
1062
- # If function is of type time series aggregates, we process aggregation differently.
1063
- # Also, one is not supposed to pass below time series aggreagtes to DataFrame.agg():
1064
- # ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top', 'top with ties']
1065
- # Thus, no extra processing is required for time series aggregates over here.
1066
-
1067
- # 'operations' contains dict of columns -> list of aggregate operations
1068
- operations = DataFrameUtils._validate_agg_function(func, column_names)
1069
-
1070
- all_valid_columns = []
1071
- all_invalid_columns = {}
1072
- all_new_column_names = []
1073
- all_new_column_types = []
1074
-
1075
- # For each column, the value is True if there is at least one valid operation (operation on valid datatype)
1076
- column_supported = {}
1077
- tdp = preparer(td_dialect)
1078
- for column in operations:
1079
- column_supported[column] = False
1080
- valid_columns = []
1081
- invalid_columns = {}
1082
- new_column_names = []
1083
- new_column_types = []
1084
- for operation in operations[column]:
1085
- is_colop_supported, new_col, new_coltype, column_aggr_expr, invalid_column_info = \
1086
- DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=operation,
1087
- describe_op=describe_op, percentile=percentile,
1088
- tdp=tdp, **kwargs)
1089
- if is_colop_supported:
1090
- column_supported[column] = is_colop_supported
1091
- new_column_names.append(new_col)
1092
- new_column_types.append(new_coltype)
1093
- valid_columns.append(column_aggr_expr)
1094
- else:
1095
- if operation in invalid_columns:
1096
- invalid_columns[operation].append(invalid_column_info)
1097
- else:
1098
- invalid_columns[operation] = [invalid_column_info]
1099
-
1100
- all_valid_columns.extend(valid_columns)
1101
- all_new_column_names.extend(new_column_names)
1102
- all_new_column_types.extend(new_column_types)
1103
-
1104
- for operation in invalid_columns:
1105
- if operation in all_invalid_columns:
1106
- all_invalid_columns[operation].extend(invalid_columns[operation])
1107
- else:
1108
- all_invalid_columns[operation] = invalid_columns[operation]
1109
-
1110
- unsupported_columns = [col for col in column_supported if not column_supported[col]]
1111
- unsupported_columns.sort() # helps in catching the columns in lexicographic order
1112
-
1113
- error_msgs = []
1114
- for operation in sorted(all_invalid_columns):
1115
- all_invalid_columns[operation].sort() # helps in catching the columns in
1116
- # lexicographic order
1117
- error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(
1118
- ", ".join(all_invalid_columns[operation]), operation)
1119
- error_msgs.append(error)
1120
-
1121
- if not all(column_supported[oper] for oper in column_supported):
1122
- new_msg = MessageCodes.TDMLDF_AGGREGATE_AGG_DICT_ERR.value.format(", ".join(unsupported_columns))
1123
- error_msgs.append(new_msg)
1124
- msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs))
1125
- raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1126
-
1127
- elif len(all_valid_columns) == 0: # No supported columns in the given list of columns
1128
- raise TeradataMlException(Messages.get_message(
1129
- MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
1130
- MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1131
-
1132
- aggregate_expr = ", ".join(all_valid_columns)
1133
- return aggregate_expr, all_new_column_names, all_new_column_types
1134
-
1135
- @staticmethod
1136
- def _construct_sql_expression_for_time_series_aggregations(df, column_names, column_types, func, **kwargs):
1137
- """
1138
- Internal function to create and return the sql expression
1139
- corresponding to given time series function, given column_names and
1140
- column_types.
1141
-
1142
- Column_types are used to check whether all the datatypes are
1143
- valid types for given operation and throw exception if they
1144
- are not.
1145
-
1146
- NOTE:
1147
- This function should be used only for time series aggregates.
1148
-
1149
- PARAMETERS :
1150
- df:
1151
- Required Argument.
1152
- Specifies teradataml DataFrame which is to be used to get the desired
1153
- aggregate column expression.
1154
- Types: teradataml DataFrame
1155
-
1156
- column_names:
1157
- Required Argument.
1158
- Specifies the column names for which desired aggregate operation is
1159
- to be executed.
1160
- Types: List of strings
1161
-
1162
- column_types:
1163
- Required Argument.
1164
- Specifies the respective column types for column names.
1165
- Types: List of teradatasqlalchemy types
1166
-
1167
- func:
1168
- Required Argument.
1169
- Specifies the aggregate function(s) to be applied on teradataml
1170
- DataFrame columns. For Time Series aggregates it is usually a string.
1171
- Types: str
1172
-
1173
- kwargs:
1174
- Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
1175
-
1176
- RETURNS :
1177
- a)sql expression as
1178
- 1. 'bottom(2, "col1") as "bottom2col1"' if
1179
- col1 and col2 are the columns in Dataframe and
1180
- operation is 'bottom'
1181
- etc...
1182
- b) new columns' names (eg min_col1, min_col2 ...)
1183
- c) new columns' types
1184
-
1185
- RAISES:
1186
- None.
1187
-
1188
- EXAMPLES:
1189
- colname_to_numvalues = {"col1" : 2, "col2": 3}
1190
- kwargs = {"colname_to_numvalues": colname_to_numvalues}
1191
- aggregate_expr, column_names, column_types = \
1192
- df_utils._construct_sql_expression_for_time_series_aggregations(column_names, column_types,
1193
- func, **kwargs)
1194
-
1195
- """
1196
-
1197
- # eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
1198
- # FLOAT(precision=0)]
1199
-
1200
- # eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
1201
- # <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
1202
- # <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
1203
-
1204
- col_names_and_types = dict(zip(column_names, column_types))
1205
- tdp = preparer(td_dialect)
1206
-
1207
- select_columns = []
1208
- new_column_names = []
1209
- new_column_types = []
1210
- if func in ["bottom", "bottom with ties", "top", "top with ties"]:
1211
- # Processing for bottom and top.
1212
- # Function name to be used in column aliasing.
1213
- column_alias_func = func.replace(" ", "_")
1214
- bottom_col_val = kwargs["colname_to_numvalues"]
1215
- for column in sorted(list(bottom_col_val.keys())):
1216
- new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
1217
- quoted_parent_column_name = tdp.quote("{0}".format(column))
1218
- quoted_new_column_name = tdp.quote(new_col_name)
1219
- select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
1220
- quoted_parent_column_name, quoted_new_column_name))
1221
- new_column_names.append(new_col_name)
1222
- new_column_types.append(col_names_and_types[column])
1223
-
1224
- if func == "delta_t":
1225
- # Argument processing for DELTA-T
1226
- new_column_names.append("delta_t_td_timecode")
1227
- quoted_new_column_name = tdp.quote(new_column_names[0])
1228
- new_column_types.append(PERIOD_TIMESTAMP)
1229
- select_columns.append("{0}((WHERE {1}), (WHERE {2})) as {3}".format(func, kwargs["start_condition"],
1230
- kwargs["end_condition"],
1231
- quoted_new_column_name))
1232
-
1233
- if func == 'mad':
1234
- # Processing for Median Absolute Deviation.
1235
- # Function name to be used in column aliasing.
1236
- column_alias_func = func.replace(" ", "_")
1237
- bottom_col_val = kwargs["colname_to_numvalues"]
1238
- for column in sorted(list(bottom_col_val.keys())):
1239
- new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
1240
- quoted_parent_column_name = tdp.quote("{0}".format(column))
1241
- quoted_new_column_name = tdp.quote(new_col_name)
1242
- select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
1243
- quoted_parent_column_name, quoted_new_column_name))
1244
- new_column_names.append(new_col_name)
1245
- if type(col_names_and_types[column]) in [DECIMAL, NUMBER]:
1246
- # If column types is DECIMAL or NUMBER, then output column types should also be same.
1247
- # Otherwise, it is FLOAT.
1248
- new_column_types.append(col_names_and_types[column])
1249
- else:
1250
- new_column_types.append(FLOAT())
1251
-
1252
- if "default_constant_for_columns" in kwargs.keys():
1253
- column_names = kwargs["default_constant_for_columns"]
1254
- column_types = [col_names_and_types[column] for column in column_names]
1255
- if len(column_names) > 0:
1256
- aggregate_expr, all_new_column_names, all_new_column_types = \
1257
- DataFrameUtils._construct_sql_expression_for_aggregations(df=df, column_names=column_names,
1258
- column_types=column_types, func=func,
1259
- )
1260
- aggregate_expr_default_column_list = [col.strip() for col in aggregate_expr.split(",")]
1261
- select_columns = select_columns + aggregate_expr_default_column_list
1262
- new_column_names = new_column_names + all_new_column_names
1263
- new_column_types = new_column_types + all_new_column_types
1264
-
1265
-
1266
- aggregate_expr = ", ".join(select_columns)
1267
- return aggregate_expr, new_column_names, new_column_types
1268
-
1269
- @staticmethod
1270
- def _construct_describe_query(df, metaexpr, percentiles, function_label, groupby_column_list=None,
1271
- include=None, is_time_series_aggregate=False, verbose=False, distinct=False,
1272
- statistics=None, **kwargs):
1273
- """
1274
- Internal function to create the sql query for describe().
1275
-
1276
- PARAMETERS :
1277
- df:
1278
- Required Argument.
1279
- Specifies teradataml DataFrame we are collecting statistics for.
1280
- Types: str
1281
-
1282
- metaexpr:
1283
- Required Argument.
1284
- Specifies the meta expression for the dataframe.
1285
- Types: _MetaExpression
1286
-
1287
- percentiles:
1288
- Required Argument.
1289
- Specifies a list of values between 0 and 1.
1290
- Types: List of floats
1291
-
1292
- function_label:
1293
- Required Argument.
1294
- Specifies a string value used as the label for the aggregate function column.
1295
- Types: str
1296
-
1297
- groupby_column_list:
1298
- Optional Argument.
1299
- Specifies the group by columns for the dataframe.
1300
- Default Values: None.
1301
- Types: str ot List of strings (str)
1302
-
1303
- include:
1304
- Optional Argument.
1305
- Specifies a string that must be "all" or None. If "all", then all columns will be included.
1306
- Otherwise, only numeric columns are used for collecting statistics.
1307
- Default Values: None.
1308
- Types: str
1309
-
1310
- is_time_series_aggregate:
1311
- Optional Argument.
1312
- Specifies a flag stating whether describe operation is time series aggregate or not.
1313
- Default Values: False.
1314
- Types: bool
1315
-
1316
- verbose:
1317
- Optional Argument.
1318
- Specifies a flag stating whether DESCRIBE VERBOSE option for time series aggregate is to be
1319
- performed or not.
1320
- Default Values: False.
1321
- Types: bool
1322
-
1323
- distinct:
1324
- Optional Argument.
1325
- Specifies a flag that decides whether to consider duplicate rows in calculation or not.
1326
- Default Values: False
1327
- Types: bool
1328
-
1329
- kwargs:
1330
- Optional Arguments.
1331
- Keyword argument for time series aggregate functions.
1332
-
1333
-
1334
- RETURNS :
1335
- A SQL query like:
1336
- select 'count' as "func", cast(count("Feb") as Number) as "Feb", cast(count(accounts) as Number) as accounts from "PYUSER"."salesview"
1337
- union all
1338
- select 'mean' as "func", cast(avg("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
1339
- union all
1340
- select 'std' as "func", cast(stddev_samp("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
1341
- union all
1342
- select 'min' as "func", cast(min("Feb") as Number) as "Feb", cast(min(accounts) as Number) as accounts from "PYUSER"."salesview"
1343
- union all
1344
- select '25%' as "func", percentile_cont(0.25) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1345
- union all
1346
- select '50%' as "func", percentile_cont(0.5) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1347
- union all
1348
- select '75%' as "func", percentile_cont(0.75) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1349
- union all
1350
- select 'max' as "func", cast(max("Feb") as Number) as "Feb", cast(max(accounts) as Number) as accounts from "PYUSER"."salesview"
1351
-
1352
- RAISES:
1353
- TeradataMLException
1354
-
1355
- EXAMPLES:
1356
- agg_query = \
1357
- df_utils._construct_describe_query("self._table_name", self._metaexpr, [.25, .5, .75], "func", self.groupby_column_list)
1358
- agg_query = \
1359
- df_utils._construct_describe_query("self._table_name", self._metaexpr, [.3, .6], "func", self.groupby_column_list, include="all")
1360
-
1361
- """
1362
- table_name = df._table_name
1363
- operators = ["count", "mean", "std", "min", "percentile", "max"]
1364
- all_operators = ["count", "unique", "mean", "std", "min", "percentile", "max"]
1365
-
1366
- if is_time_series_aggregate and verbose:
1367
- # Time Series Aggregate Operators for Vantage DESCRIBE function with verbose
1368
- operators = ['max', 'mean', 'median', 'min', 'mode', "percentile", 'std']
1369
- elif is_time_series_aggregate and not verbose:
1370
- # Time Series Aggregate Operators for Vantage DESCRIBE function.
1371
- operators = ['max', 'mean', 'min', 'std']
1372
-
1373
- col_names = []
1374
- col_types = []
1375
- sel_agg_stmts = []
1376
- tdp = preparer(td_dialect)
1377
- quoted_function_label = tdp.quote(function_label)
1378
-
1379
- if include is not None and include == 'all' and not is_time_series_aggregate:
1380
- operators = all_operators
1381
-
1382
- if include is None and statistics is not None:
1383
- operators = statistics
1384
-
1385
- table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
1386
- is_time_series_aggregate, **kwargs)
1387
-
1388
- for col in metaexpr.c:
1389
- if (include is None and type(col.type) in UtilFuncs()._get_numeric_datatypes()) or include == 'all' or statistics is not None:
1390
- if not(groupby is not None and col.name in groupby_column_list):
1391
- col_names.append(col.name)
1392
- col_types.append(col.type)
1393
-
1394
- if len(col_names) == 0:
1395
- raise TeradataMlException(
1396
- Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR,
1397
- "The DataFrame does not contain numeric columns"),
1398
- MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1399
-
1400
- for op in operators:
1401
- if op == "percentile":
1402
- for p in percentiles:
1403
- agg_expr, new_col_names, new_col_types = \
1404
- DataFrameUtils._construct_sql_expression_for_aggregations(df,
1405
- col_names, col_types, op, percentile=p, describe_op=True, distinct=distinct,
1406
- as_time_series_aggregate=is_time_series_aggregate)
1407
- sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}%' as varchar(6)) as \"{1}\", {2} from {3} ".format(
1408
- int(p*100), quoted_function_label, agg_expr, table_name, sel_groupby))
1409
- else:
1410
- agg_expr, new_col_names, new_col_types = \
1411
- DataFrameUtils._construct_sql_expression_for_aggregations(df,
1412
- col_names, col_types, op, describe_op=True, distinct=distinct,
1413
- as_time_series_aggregate=is_time_series_aggregate)
1414
- sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}' as varchar(6)) as \"{1}\", \n\t{2} \nfrom \n\t{3} ".format(
1415
- op, quoted_function_label, agg_expr, table_name, sel_groupby))
1416
- return " \nunion all\n ".join(sel_agg_stmts)
1417
-
1418
- @staticmethod
1419
- def _process_groupby_clause(table_name, groupby_column_list, is_time_series_aggregate, **kwargs):
1420
- """
1421
- Internal function used to process and generate GROUP BY or GROUP BY TIME clauses required for
1422
- query to be run for describe operation.
1423
-
1424
- PARAMETERS:
1425
- table_name:
1426
- Required Arguments.
1427
- Specifies table name to be used for forming describe query.
1428
- Types: str
1429
-
1430
- groupby_column_list:
1431
- Required Arguments.
1432
- Specifies list of column names involved in Group By.
1433
- Types: List of Strings.
1434
-
1435
- is_time_series_aggregate:
1436
- Required Arguments.
1437
- Specifies a boolean stating whether GROUP BY clause to be formed is for
1438
- Time series aggregate or not.
1439
- Types: bool
1440
-
1441
- kwargs:
1442
- Optional Arguments.
1443
- Keyword argument for time series aggregate functions.
1444
-
1445
- RETURNS:
1446
- 1. Table Name appended with GROUP BY clause.
1447
- 2. Column projection string for GROUP BY columns.
1448
- 3. Group By Clause.
1449
-
1450
- RAISES:
1451
- None.
1452
-
1453
- EXAMPLES:
1454
- table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
1455
- is_time_series_aggregate, **kwargs)
1456
-
1457
- """
1458
- sel_groupby = ""
1459
- grp_by_clause = None
1460
-
1461
- if is_time_series_aggregate:
1462
- # For time series aggregate timebucket_duration is must so, it'll be always present in kwargs.
1463
- grp_by_clause = "GROUP BY TIME ({0}".format(kwargs['timebucket_duration'])
1464
-
1465
- # Add columns in value expression to GROUP BY TIME
1466
- if 'value_expression' in kwargs and \
1467
- kwargs['value_expression'] is not None and \
1468
- len(kwargs['value_expression']) > 0:
1469
- grp_by_clause = "{0} and {1}".format(grp_by_clause, ", ".join(kwargs['value_expression']))
1470
-
1471
- # Complete the parenthesis for GROUP BY TIME
1472
- grp_by_clause = "{0})".format(grp_by_clause)
1473
-
1474
- # Add Time code column information.
1475
- if 'timecode_column' in kwargs and \
1476
- kwargs['timecode_column'] is not None and \
1477
- len(kwargs['timecode_column']) > 0:
1478
- if 'sequence_column' in kwargs and \
1479
- kwargs['timecode_column'] is not None and \
1480
- len(kwargs['timecode_column']) > 0:
1481
- grp_by_clause = "{0} USING TIMECODE({1}, {2})".format(grp_by_clause, kwargs['timecode_column'],
1482
- kwargs['sequence_column'])
1483
- else:
1484
- grp_by_clause = "{0} USING TIMECODE({1})".format(grp_by_clause, kwargs['timecode_column'])
1485
-
1486
- # Add Fill inforamtion
1487
- if 'fill' in kwargs and kwargs['fill'] is not None and len(kwargs['fill']) > 0:
1488
- grp_by_clause = "{0} FILL({1})".format(grp_by_clause, kwargs['fill'])
1489
-
1490
- else:
1491
- if groupby_column_list is not None:
1492
- grp_by_clause = "GROUP BY {0}".format(",".join(groupby_column_list))
1493
-
1494
- if grp_by_clause is not None:
1495
- table_name = "{0} \n{1}".format(table_name, grp_by_clause)
1496
- tdp = preparer(td_dialect)
1497
- for g in groupby_column_list:
1498
- if is_time_series_aggregate:
1499
- if g == "TIMECODE_RANGE":
1500
- g = "$TD_TIMECODE_RANGE"
1501
-
1502
- if "GROUP BY TIME" in g:
1503
- g = "$TD_GROUP_BY_TIME"
1504
-
1505
- quoted_name = tdp.quote(g)
1506
- sel_groupby += "{0}, ".format(quoted_name)
1507
-
1508
- return table_name, sel_groupby, grp_by_clause
1509
-
1510
- @staticmethod
1511
- def _get_column_names_and_types_from_metaexpr(metaexpr):
1512
- """
1513
- Internal function to return column names and respective types
1514
- given _metaexpr.
1515
-
1516
- PARAMETERS:
1517
- metaexpr:
1518
- Required Argument.
1519
- Dataframe's metaexpr. It is used to get column names and types.
1520
- Types: MetaExpression
1521
-
1522
- RETURNS:
1523
- Two lists - one for column names and another for column types
1524
-
1525
- RAISES:
1526
- None
1527
-
1528
- EXAMPLES:
1529
- dfUtils._get_column_names_and_types_from_metaexpr(
1530
- df._metaexpr)
1531
- """
1532
- # Constructing New Column names & Types for selected columns ONLY using Parent _metaexpr
1533
- col_names = []
1534
- col_types = []
1535
- for c in metaexpr.c:
1536
- col_names.append(c.name)
1537
- col_types.append(c.type)
1538
-
1539
- return col_names, col_types
1540
-
1541
- @staticmethod
1542
- def _insert_all_from_table(to_table_name, from_table_name, column_list, schema_name,
1543
- temporary=False):
1544
- """
1545
- Inserts all records from one table into the second, using columns ordered by column list.
1546
-
1547
- PARAMETERS:
1548
- to_table_name - String specifying name of the SQL Table to insert to.
1549
- insert_from_table_name - String specifying name of the SQL Table to insert from.
1550
- column_list - List of strings specifying column names used in the insertion.
1551
- schema_name - Name of the database schema to insert table data into.
1552
- temporary - Specifies whether to create Vantage tables as permanent or volatile.
1553
- Default: False
1554
- Note: When True:
1555
- 1. volatile Tables are created, and
1556
- 2. schema_name is ignored.
1557
- When False, permanent tables are created.
1558
- RETURNS:
1559
- None
1560
-
1561
- RAISES:
1562
- Database error if an error occurred while executing the insert command.
1563
-
1564
- EXAMPLES:
1565
- df_utils._insert_all_from_table('table1_name', 'table2_name', ['col1', 'col2', 'col3'])
1566
- """
1567
- tdp = preparer(td_dialect)
1568
-
1569
- # Construct INSERT command.
1570
- column_order_string = ', '.join([tdp.quote("{0}".format(element)) for element in column_list])
1571
-
1572
- if schema_name:
1573
- full_to_table_name = tdp.quote(schema_name) + "." + tdp.quote(to_table_name)
1574
- elif temporary:
1575
- full_to_table_name = tdp.quote(to_table_name)
1576
- else:
1577
- full_to_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
1578
- to_table_name)
1579
-
1580
- insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name, from_table_name, column_order_string)
1581
-
1582
- # Execute INSERT command.
1583
- return UtilFuncs._execute_ddl_statement(insert_sql)
1584
-
1585
- @staticmethod
1586
- def _dataframe_has_column(data, column):
1587
- """
1588
- Function to check whether column names in columns are present in given dataframe or not.
1589
- This function is used currently only for Analytics wrappers.
1590
-
1591
- PARAMETERS:
1592
- data - teradataml DataFrame to check against for column existence.
1593
- column - Column name (a string).
1594
-
1595
- RAISES:
1596
- None
1597
-
1598
- EXAMPLES:
1599
- DataFrameUtils._dataframe_has_column(data, col)
1600
- """
1601
- if column in [c.name for c in data._metaexpr.c]:
1602
- return True
1603
-
1604
- return False
1605
-
1606
- @staticmethod
1607
- def _get_row_count(table_name):
1608
- """
1609
- Function to return the row count of a teradataml Dataframe.
1610
- This function is used currently to determine the shape/size of a dataframe.
1611
-
1612
- PARAMETERS:
1613
- table_name - Name of the table to get the row count for.
1614
-
1615
- RAISES:
1616
- TeradataMlException (TDMLDF_INFO_ERROR)
1617
-
1618
- EXAMPLES:
1619
- DataFrameUtils._get_row_count(table_name)
1620
- """
1621
- # Construct COUNT(*) Query
1622
- try:
1623
- row_count_query = SQLBundle._build_nrows_print_query(table_name)
1624
- res = execute_sql(row_count_query)
1625
- return res.fetchone()[0]
1626
-
1627
- except TeradataMlException:
1628
- raise
1629
-
1630
- except Exception as err:
1631
- # TODO Better handle the level of information being presented to the user with logging
1632
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
1633
- MessageCodes.TDMLDF_INFO_ERROR) from err
1634
-
1635
- @staticmethod
1636
- def _get_scalar_value(table_name):
1637
- """
1638
- Function to return the the only 1x1 (scalar) value from a teradataml Dataframe.
1639
-
1640
- PARAMETERS:
1641
- table_name - Name of the table to get the value from.
1642
-
1643
- RAISES:
1644
- TeradataMlException (TDMLDF_INFO_ERROR)
1645
-
1646
- EXAMPLES:
1647
- DataFrameUtils._get_scalar_value(table_name)
1648
- """
1649
- # Construct the base Query
1650
- try:
1651
- select_query = SQLBundle._build_base_query(table_name)
1652
- res = execute_sql(select_query)
1653
- return res.fetchone()[0]
1654
-
1655
- except TeradataMlException:
1656
- raise
1657
-
1658
- except Exception as err:
1659
- # TODO Better handle the level of information being presented to the user with logging
1660
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
1661
- MessageCodes.TDMLDF_INFO_ERROR) from err
1662
-
1663
- @staticmethod
1664
- def _get_sorted_nrow(df, n, sort_col, asc=True):
1665
- """
1666
- Internal Utility function that returns a teradataml DataFrame containing n rows
1667
- of the DataFrame. The Dataframe is sorted on the index column or the first column
1668
- if there is no index column.
1669
-
1670
- PARAMETERS:
1671
- df: teradataml DataFrame
1672
- n: Specifies the number of rows to select.
1673
- Type: int
1674
- sort_col: The column to sort on.
1675
- Type: str
1676
- asc: (optional) - Specifies sort order.
1677
- If True, sort in ascending order.
1678
- If False, sort in descending order.
1679
- The default value is True.
1680
- Type: boolean
1681
-
1682
- RETURNS:
1683
- teradataml DataFrame
1684
-
1685
- EXAMPLES:
1686
- DataFrameUtils._get_sorted_nrow(df, 10)
1687
- DataFrameUtils._get_sorted_nrow(df, 20, asc=True)
1688
- DataFrameUtils._get_sorted_nrow(df, 30, asc=False)
1689
-
1690
- """
1691
- #TODO: implement and use this in teradatasqlalchemy
1692
- tdp = preparer(td_dialect)
1693
- aed_utils = AedUtils()
1694
-
1695
- sort_order = "asc"
1696
- if not asc:
1697
- sort_order = "desc"
1698
-
1699
- quoted_cols = [tdp.quote(c) for c in df.columns]
1700
- sel_cols_str = ",".join(quoted_cols)
1701
- sel_row_num = "row_number() over (order by \"{0}\" {1}) - 1 as tdml_row_num, {2}".format(sort_col, sort_order, sel_cols_str)
1702
- filter_str = "tdml_row_num < {0}".format(n)
1703
- sel_nodeid = aed_utils._aed_select(df._nodeid, sel_row_num)
1704
- fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
1705
- sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
1706
- col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
1707
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types))
1708
- # Call the function from_node from appropriate class either DataFrame or GeoDataFrame
1709
- new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
1710
- new_df._orderby = df._orderby
1711
- new_df._metaexpr._n_rows = n
1712
- return new_df
1713
-
1714
- @staticmethod
1715
- def _get_database_names(connection, schema_name):
1716
- """
1717
- Function to return a list valid of database names for a given sqlalchemy connection.
1718
- This function is used to determine whether the database used is valid in user APIs such as copy_to_sql.
1719
-
1720
- PARAMETERS:
1721
- connection: Required Argument.
1722
- A SQLAlchemy connection object.
1723
-
1724
- schema_name: Required Argument
1725
- String specifying the requested schema name.
1726
-
1727
- RAISES:
1728
- TeradataMlException (TDMLDF_INFO_ERROR)
1729
-
1730
- EXAMPLES:
1731
- DataFrameUtils._get_database_names(get_connection(), schema_name)
1732
- """
1733
- #TODO: implement and use this in teradatasqlalchemy
1734
- table_obj = table('databasesV', column('databasename'), schema='dbc')
1735
- stmt = select(text(str(func.lower(table_obj.c.databasename)) + ' as databasename')).where(
1736
- text('databasename (NOT CASESPECIFIC) = {} (NOT CASESPECIFIC)'.format(':schema_name')))
1737
- stmt = text(str(stmt))
1738
- stmt = stmt.bindparams(schema_name=schema_name)
1739
- res = connection.execute(stmt).fetchall()
1740
- return [name.databasename for name in res]
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+
4
+ Unpublished work.
5
+ Copyright (c) 2018 by Teradata Corporation. All rights reserved.
6
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
7
+
8
+ Primary Owner: mark.sandan@teradata.com
9
+ Secondary Owner:
10
+
11
+ This file implements util functions of data frame.
12
+ """
13
+
14
+ import numbers
15
+ import pandas as pd
16
+ from collections import OrderedDict
17
+
18
+ from teradataml.common.utils import UtilFuncs
19
+ from teradataml.common.aed_utils import AedUtils
20
+ from teradataml.common.constants import AEDConstants, PTITableConstants, \
21
+ SQLPattern, PythonTypes
22
+ from teradataml.common.sqlbundle import SQLBundle
23
+ from teradataml.common.exceptions import TeradataMlException
24
+ from teradataml.common.messages import Messages
25
+ from teradataml.common.messagecodes import MessageCodes
26
+
27
+ from teradataml.context.context import get_context, get_connection
28
+ from teradataml.context.context import _get_current_databasename
29
+ from teradataml.dbutils.dbutils import _execute_query_and_generate_pandas_df
30
+
31
+ from teradataml.options.display import display
32
+ from teradataml.options.configure import configure
33
+ from teradataml.utils.utils import execute_sql
34
+
35
+ from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
36
+ from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
37
+ import teradataml.dataframe as tdmldf
38
+
39
+ from sqlalchemy.sql import select
40
+ from sqlalchemy.sql.expression import text
41
+ from sqlalchemy import table, column, func
42
+ from datetime import datetime, date, time
43
+ from decimal import Decimal
44
+
45
+ # TODO - Need to write unit testcases for these functions
46
+ class DataFrameUtils():
47
+
48
+ @staticmethod
49
+ def _execute_node_return_db_object_name(nodeid, metaexpression = None):
50
+ """
51
+ Fetches queries and view names from AED node and creates views from queries
52
+ Additionally inspects the metaexpression for consistency
53
+
54
+ PARAMETERS:
55
+ nodeid: nodeid to execute
56
+ metaexpression: (optional) updated _metaexpr to validate
57
+
58
+ EXAMPLES:
59
+ _execute_node_return_db_object_name(nodeid)
60
+ _execute_node_return_db_object_name(nodeid, metaexpr)
61
+
62
+ RETURNS:
63
+ Top level view name.
64
+
65
+ """
66
+ aed_obj = AedUtils()
67
+ if not aed_obj._aed_is_node_executed(nodeid):
68
+
69
+ view_query_node_type_list = aed_obj._aed_get_exec_query(nodeid)
70
+ view_names, queries, node_query_types, node_ids = view_query_node_type_list
71
+
72
+ # Executing Nodes / Creating Views
73
+ for index in range(len(queries) - 1, -1, -1):
74
+ is_persist = False
75
+ if metaexpression and metaexpression._is_persist:
76
+ is_persist = True
77
+
78
+ try:
79
+ if node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_ML_QUERY_MULTI_OUTPUT.value or\
80
+ ("OUT TABLE " in queries[index] and SQLPattern.SQLMR.value.match(queries[index])) or \
81
+ is_persist:
82
+ # TODO:: OR condition in above needs to be removed once AED support is added.
83
+ UtilFuncs._create_table(view_names[index], queries[index])
84
+
85
+ elif node_query_types in ['groupby', 'groupbytime']:
86
+ # If query_type is either groupby or groupbytime get it's parent
87
+ # nodeid and execute queries for the same
88
+ parent_nodeid = aed_obj._aed_get_parent_nodeids(nodeid)[0]
89
+ DataFrameUtils._execute_node_return_db_object_name(parent_nodeid)
90
+
91
+ elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_REFERENCE.value:
92
+ # Reference nodes - To be ignored.
93
+ pass
94
+
95
+ else:
96
+ UtilFuncs._create_view(view_names[index], queries[index])
97
+
98
+ # Updating Node Status for executed Node
99
+ aed_obj._aed_update_node_state_single(node_ids[index], AEDConstants.AED_NODE_EXECUTED.value)
100
+
101
+ except Exception as emsg:
102
+ # TODO:: Append node execution details to emsg.
103
+ # Node description, such as nodeType or node operation, should be added
104
+ # here in 'emsg' to give away more information, where exactly
105
+ # node execution failed.
106
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
107
+ MessageCodes.TDMLDF_EXEC_SQL_FAILED)
108
+
109
+ # Setting New Table name retrieved to TDML DF
110
+ result_table_view_name = aed_obj._aed_get_tablename(nodeid)
111
+ # validate the metaexpression
112
+ if configure._validate_metaexpression:
113
+ DataFrameUtils._validate_metaexpression(result_table_view_name, metaexpression)
114
+
115
+ return result_table_view_name
116
+
117
+ @staticmethod
118
+ def _validate_metaexpression(result_table_view_name, metaexpression):
119
+ """
120
+ Inspects the metaexpression for consistency with the underlying table/view
121
+
122
+ PARAMETERS:
123
+ result_table_view_name: a string representing the table/view name to check column metadata
124
+ metaexpression: the metaexpr of the DataFrame to compare against the result_table_view_name
125
+
126
+ EXAMPLES:
127
+ _validate_metaexpression('t1', df._metaexpr)
128
+ _execute_node_return_db_object_name(nodeid, metaexpr)
129
+
130
+ RETURNS:
131
+ None
132
+ Outputs RuntimeWarnings if mismatches are found
133
+
134
+ """
135
+ # metaexpression should have already been updated
136
+ if metaexpression is not None:
137
+
138
+ name = lambda x: x[0]
139
+ type_ = lambda x: x[1]
140
+
141
+ # compare sorted by name of column
142
+ df = sorted(UtilFuncs._describe_column(DataFrameUtils._get_metadata_from_table(result_table_view_name)), key = lambda x: x[0])
143
+ meta = sorted(metaexpression.c, key = lambda x: x.name)
144
+
145
+ # check length
146
+ if len(df) == len(meta):
147
+ for i in range(len(df)):
148
+
149
+ # map Teradata type to python type
150
+ meta_type = UtilFuncs._teradata_type_to_python_type(meta[i].type)
151
+
152
+ # compare column names and types
153
+ if meta[i].name != name(df[i]) or meta_type != type_(df[i]):
154
+ err_msg = "[Mismatch when checking %s]\n\t[Table/View] %s %s\n\t[MetaExpression] %s %s (mapped from => %s)\n"
155
+ raise RuntimeError(err_msg % (result_table_view_name,
156
+ name(df[i]), type_(df[i]),
157
+ meta[i].name, meta_type, meta[i].type))
158
+ else:
159
+ err_msg = "[Length mismatch when checking %s]\nSource Table/View has length %s but MetaExpression has length %s"
160
+ raise RuntimeError(err_msg % (result_table_view_name, len(df), len(meta)))
161
+
162
+ @staticmethod
163
+ def _get_dataframe_print_string(table_name, index_label, orderby=None, undropped_index=None):
164
+ """
165
+ Builds string output for teradataml DataFrame
166
+
167
+ PARAMETERS:
168
+ table_name - Name of the database table to read from.
169
+ index_label - String/List specifying column to use as index.
170
+ orderby - order expression to sort returned rows
171
+
172
+ EXAMPLES:
173
+ _get_dataframe_print_string('table_name', None, None)
174
+
175
+ RETURNS:
176
+ String representation of a pandas DataFrame.
177
+
178
+ """
179
+ read_query = SQLBundle._build_top_n_print_query(table_name, display.max_rows, orderby)
180
+
181
+ if index_label is not None:
182
+ pandas_df = _execute_query_and_generate_pandas_df(read_query, index=index_label)
183
+ else:
184
+ pandas_df = _execute_query_and_generate_pandas_df(read_query)
185
+
186
+ return pandas_df.to_string()
187
+
188
+ @staticmethod
189
+ def _get_pprint_dtypes(column_names_and_types, null_count=False):
190
+ """
191
+ returns a string containing the column names and types.
192
+ If null_count is not None, the string will also contain
193
+ the number of non-null values for each column.
194
+
195
+ PARAMETERS:
196
+ column_names_and_types - List of column names and types.
197
+ null_count(optional) - List of the non-null count for each column.
198
+
199
+ EXAMPLES:
200
+ >>>print(_get_pprint_dtypes(column_names_and_types)
201
+ accounts str
202
+ Feb float
203
+ Jan int
204
+ Mar int
205
+ Apr int
206
+ datetime str
207
+
208
+ >>>print(_get_pprint_dtypes(column_names_and_types, null_count)
209
+ accounts 3 non-null str
210
+ Feb 3 non-null float
211
+ Jan 3 non-null int
212
+ Mar 3 non-null int
213
+ Apr 3 non-null int
214
+ datetime 3 non-null str
215
+
216
+ RAISES:
217
+
218
+ """
219
+
220
+ col_names = [i[0] for i in column_names_and_types]
221
+ col_types = [i[1] for i in column_names_and_types]
222
+ max_col_names = len(max(col_names, key=len)) + 4
223
+ max_col_types = len(max(col_types, key=len))
224
+ dtypes_string = ""
225
+ if not null_count:
226
+ for colname, coltype in column_names_and_types:
227
+ dtypes_string += "{0: <{name_width}}{1: >{type_width}}\n".format(colname, coltype,
228
+ name_width=max_col_names,
229
+ type_width=max_col_types)
230
+ else:
231
+ null_count = [i[2] for i in column_names_and_types]
232
+ max_null_count = len(str(max(null_count, key=len)))
233
+ for colname, coltype, num_nulls in column_names_and_types:
234
+ dtypes_string += "{0: <{name_width}}{1: <{count_width}} non-null {2: <{type_width}}\n".format(colname,
235
+ num_nulls,
236
+ coltype,
237
+ name_width=max_col_names,
238
+ count_width=max_null_count,
239
+ type_width=max_col_types)
240
+ # Remove last new line character.
241
+ dtypes_string = dtypes_string[:-1]
242
+ return dtypes_string
243
+
244
+ @staticmethod
245
+ def _get_metadata_from_table(table_name):
246
+ """
247
+ Retrieves column metadata by executing a HELP COLUMN command.
248
+
249
+ PARAMETERS:
250
+ table_name - The table name or view name.
251
+
252
+ RETURNS:
253
+ returns the result set (column information) from HELP COLUMN.
254
+
255
+ RAISES:
256
+ Database error if an error occurred while executing the HELP COLUMN.
257
+
258
+ EXAMPLES:
259
+ df = DataFrame.from_table('mytab')
260
+ metadata = _get_metadata_from_table(df._table_name)
261
+ """
262
+ # Construct HELP COLUMN command.
263
+ help_col_sql = SQLBundle._build_help_column(table_name)
264
+ # Execute HELP COLUMN command.
265
+ return UtilFuncs._execute_query(help_col_sql)
266
+
267
+ @staticmethod
268
+ def _extract_select_string(select_expression):
269
+ """
270
+ Takes in a string/list representing a Pandas selection clause of any of the forms (only):
271
+ a) "col1" or 'col1'
272
+ b) ["col 1"] or ['col 1']
273
+ c) ["col1", "col2", "col3"] or ['col1', 'col2', 'col3']
274
+ d) [['col1', 'col2', 'col3']] or [["col1", "col2", "col3"]]
275
+
276
+ And returns a list with column strings representing the selection of the form:
277
+ a) ['col1']
278
+ b) ['col 1']
279
+ c) ['col1','col2','col3']
280
+ d) ['col1','col2','col3']
281
+
282
+ Column Names ("col1", "col2"..) are Strings representing database table Columns.
283
+ All Standard Teradata Data-Types for columns supported: INTEGER, VARCHAR(5), FLOAT.
284
+
285
+ PARAMETERS:
286
+ selection_expression - Expression representing column selection
287
+ Type - String or List of Strings or List of List (Single level only)
288
+ Required - Yes
289
+
290
+ EXAMPLES:
291
+ UtilFuncs._extract_select_string([['col1', 'col2']])
292
+ UtilFuncs._extract_select_string("col1")
293
+ UtilFuncs._extract_select_string(["col1"])
294
+ UtilFuncs._extract_select_string(["col1","col2","col3"])
295
+
296
+ RETURNS:
297
+ List of Strings representing column names.
298
+
299
+ RAISES:
300
+ TeradataMlException
301
+ """
302
+ tdp = preparer(td_dialect)
303
+ column_list = []
304
+
305
+ # Single String column
306
+ if isinstance(select_expression, str):
307
+ # Error handling - Empty String
308
+ if select_expression == "":
309
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
310
+ MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
311
+ else:
312
+ column_list.append(tdp.quote("{0}".format(select_expression.strip())))
313
+
314
+ # Error Handling - [], [""], [None], ["None"], ['col1', None], ['col1', '']
315
+ elif isinstance(select_expression, list) and (len(select_expression) == 0 or
316
+ any(element in [None, "None", ""] for element in select_expression)):
317
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
318
+ MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
319
+
320
+ # List - ["col1"] or ["col1", "col2", "col3"]
321
+ elif isinstance(select_expression, list) and all(isinstance(element, str) for element in select_expression):
322
+ if len(select_expression) == 1:
323
+ column_list.append(tdp.quote("{0}".format(select_expression[0].strip())))
324
+ else:
325
+ column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression]
326
+
327
+ # List of List (Single level only - Pandas Syntax) - [["col1", "col2", "col3"]]
328
+ elif isinstance(select_expression, list) and isinstance(select_expression[0], list):
329
+ # Error Handling - [[]], [[""]], [[None]], [['col1', None]], [['col1', "None"]], ["col1", ""]
330
+ if len(select_expression[0]) == 0 or any(element in [None, "None", ""] for element in select_expression[0]):
331
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
332
+ MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
333
+
334
+ else:
335
+ column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression[0]]
336
+
337
+ # Any other Format - Raise Format Exception
338
+ else:
339
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_FORMAT),
340
+ MessageCodes.TDMLDF_SELECT_INVALID_FORMAT)
341
+ return column_list
342
+
343
+ @staticmethod
344
+ def _get_primary_index_from_table(table_name):
345
+ """
346
+ Retrieves the primary index by executing a HELP INDEX command.
347
+ PARAMETERS:
348
+ table_name - The table name or volatile table name.
349
+ RETURNS:
350
+ Returns a list containing the primary index columns from HELP INDEX.
351
+ If the there are no primary index (NoPI table), then returns None.
352
+ RAISES:
353
+ Database error if an error occurred while executing the HELP INDEX.
354
+ EXAMPLES:
355
+ df = DataFrame.from_table('mytab')
356
+ index_labels = df._get_metadata_from_table(df._table_name)
357
+ """
358
+ # Construct HELP INDEX command.
359
+ help_index_sql = SQLBundle._build_help_index(table_name)
360
+
361
+ # Execute HELP INDEX command.
362
+ rows = UtilFuncs._execute_query(help_index_sql)
363
+ index_labels = []
364
+ for row in rows:
365
+ # row[1] specifies whether the Index is 'Primary or Secondary?'
366
+ if row[1].rstrip() == 'P':
367
+ # row[2] specifies a string of comma separated column names that form the primary index
368
+ if "," in row[2]:
369
+ index_cols = row[2].split(',')
370
+ else:
371
+ index_cols = [row[2]]
372
+ for index_col in index_cols:
373
+ # Since TD_TIMEBUCKET column in PTI tables is not functionally available, it can be ignored
374
+ # from the index information as well (else a warning is generated by SQLAlchemy).
375
+ # row[12] corresponds to 'Timebucket' column in the results of 'help index' SQL command, which
376
+ # is available only when the version supports PTI tables.
377
+ if index_col == PTITableConstants.TD_TIMEBUCKET.value and len(row) > 11 and row[12] is not None:
378
+ continue
379
+ else:
380
+ index_labels.append(index_col)
381
+
382
+ if len(index_labels) > 0:
383
+ return index_labels
384
+ else:
385
+ return None
386
+
387
+ @staticmethod
388
+ def __validate_sort_type_raise_exception(sort_col_type):
389
+ """
390
+ Function to raise TeradatamlException for errors encountered for invalid/incorrect
391
+ "sort_col_type" in "_validate_sort_type" function.
392
+
393
+ PARAMETERS:
394
+ sort_col_type: The sort column type.
395
+
396
+ RETURNS:
397
+ None
398
+
399
+ RAISES:
400
+ TeradataMlException
401
+
402
+ EXAMPLES:
403
+ df_utils.__validate_sort_type_raise_exception(PythonTypes.PY_STRING_TYPE.value)
404
+ """
405
+ msg = Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE).format(sort_col_type)
406
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE)
407
+
408
+ @staticmethod
409
+ def _validate_sort_col_type(sort_col_type, sort_col_values):
410
+ """
411
+ Validates a list of sort column values with the sort column type.
412
+
413
+ PARAMETERS:
414
+ sort_col_type - The sort column type.
415
+ sort_col_values - A single value or list-like values
416
+
417
+ RETURNS:
418
+ None
419
+
420
+ RAISES:
421
+ TeradataMlException
422
+
423
+ EXAMPLES:
424
+ df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, ["Jan", "Feb"])
425
+ df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, "Jan")
426
+ df_utils._validate_sort_col_type(PythonTypes.PY_INT_TYPE.value, [1, 2])
427
+ """
428
+ if isinstance(sort_col_values, list):
429
+ if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
430
+ if not all(isinstance(i, str) for i in sort_col_values):
431
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
432
+ elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
433
+ if not all(isinstance(i, float) for i in sort_col_values):
434
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
435
+ elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
436
+ if not all(isinstance(i, Decimal) for i in sort_col_values):
437
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
438
+ elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
439
+ if not all(isinstance(i, datetime) for i in sort_col_values):
440
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
441
+ elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
442
+ if not all(isinstance(i, time) for i in sort_col_values):
443
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
444
+ elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
445
+ if not all(isinstance(i, date) for i in sort_col_values):
446
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
447
+ elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
448
+ if not all(isinstance(i, bytes) for i in sort_col_values):
449
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
450
+ else: # numeric type
451
+ if not all(isinstance(i, numbers.Integral) for i in sort_col_values):
452
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
453
+ elif isinstance(sort_col_values, (tuple, dict)):
454
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
455
+ MessageCodes.TDMLDF_DROP_ARGS)
456
+ else:
457
+ if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
458
+ if not isinstance(sort_col_values, str):
459
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
460
+ elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
461
+ if not isinstance(sort_col_values, float):
462
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
463
+ elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
464
+ if not isinstance(sort_col_values, Decimal):
465
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
466
+ elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
467
+ if not isinstance(sort_col_values, datetime):
468
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
469
+ elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
470
+ if not isinstance(sort_col_values, time):
471
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
472
+ elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
473
+ if not isinstance(sort_col_values, date):
474
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
475
+ elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
476
+ if not isinstance(sort_col_values, bytes):
477
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
478
+ else: # numeric type
479
+ if not isinstance(sort_col_values, numbers.Integral):
480
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
481
+
482
+ def _get_required_columns_types_from_metaexpr(metaexpr, col_list = None):
483
+ """
484
+ Retrieves column names and types from meta expression. If you want to get types for only some columns,
485
+ pass those columns to 'col_list' argument.
486
+
487
+ PARAMETERS:
488
+ metaexpr - Meta expression from which columns and types to be retrieved.
489
+ col_list - Column list for which you want to get types
490
+
491
+ RETURNS:
492
+ Dictionary: key as column name and datatype as value.
493
+
494
+ EXAMPLES:
495
+ df = DataFrame.from_table('mytab')
496
+ metadata = _get_required_columns_types_from_metaexpr()
497
+ """
498
+
499
+ if isinstance(col_list, str):
500
+ col_list = [col_list]
501
+
502
+ if col_list is not None and not isinstance(col_list, list):
503
+ return None
504
+
505
+ meta_cols = metaexpr.t.c
506
+ meta_columns = [c.name for c in meta_cols]
507
+ col_names = []
508
+ col_types = []
509
+
510
+ # When column list to retrieve is not provided, return meta-data for all columns.
511
+ if col_list is None:
512
+ for col_name in meta_columns:
513
+ col_names.append(meta_cols[col_name].name)
514
+ col_types.append(meta_cols[col_name].type)
515
+
516
+ # Return meta-data for only requested columns otherwise.
517
+ else:
518
+ for col_name in col_list:
519
+ if DataFrameUtils._check_column_exists(col_name, meta_columns):
520
+ # _metaexpr saves columns without quotes, so unquoting.
521
+ unquoted_col_name = col_name.replace('"', "")
522
+ col_names.append(meta_cols[unquoted_col_name].name)
523
+ col_types.append(meta_cols[unquoted_col_name].type)
524
+
525
+ return OrderedDict(zip(col_names, col_types))
526
+
527
+ @staticmethod
528
+ def _check_column_exists(column_name, df_columns):
529
+ """
530
+ Checks provide column present in list of columns or not.
531
+ Note:
532
+ It is calling functions responsibility to send the column and columns list in proper case.
533
+ By default the look up is case-sensitive. If they would like to have it case insensitive, then
534
+ one should send the the column_name and df_columns list in lower case.
535
+
536
+ PARAMETERS:
537
+ column_name - Column name which need to be check.
538
+ df_columns - List columns in which column to be check.
539
+
540
+ RETURNS:
541
+ True if column exists otherwase False.
542
+
543
+ EXAMPLES:
544
+ df = DataFrame.from_table('mytab')
545
+ metadata = _check_column_exists("col1", df.columns)
546
+ """
547
+ unquoted_df_columns = [column.replace('"', "") for column in df_columns]
548
+ if column_name.replace('"', "") in unquoted_df_columns:
549
+ return True
550
+ else:
551
+ return False
552
+
553
+ @staticmethod
554
+ def _validate_agg_function(func, col_names):
555
+ """
556
+ Internal function to validate column names against actual
557
+ column names passed as parameter and aggregate operations
558
+ against valid aggregate operations.
559
+
560
+ PARAMETERS:
561
+ func - (Required) Specifies the function(s) to be
562
+ applied on teradataml DataFrame columns.
563
+ Acceptable formats for function(s) are string,
564
+ dictionary or list of strings/functions.
565
+ Accepted combinations are:
566
+ 1. String function name
567
+ 2. List of string functions
568
+ 3. Dictionary of column names -> string function
569
+ (or list of string functions)
570
+ col_names - List. Names of the columns in Dataframe.
571
+
572
+ RETURNS:
573
+ operations - dict of columns -> aggregate operations
574
+ Unified dictionary, similar to func, even for string and
575
+ list of strings or functions.
576
+
577
+ RAISES:
578
+ 1. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
579
+ operation(s) received in parameter 'func' is/are
580
+ invalid.
581
+
582
+ Possible Value :
583
+ Invalid aggregate operation(s): minimum, counter.
584
+ Valid aggregate operation(s): count, max, mean, min,
585
+ std, sum.
586
+
587
+ 2. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
588
+ specified in 'func' is not present in the dataframe.
589
+
590
+ Possible Value :
591
+ Invalid column(s) given in parameter func: col1.
592
+ Valid column(s) : A, B, C, D.
593
+
594
+ EXAMPLES:
595
+ Let the dataframe contain 2 columns, col1 and col2.
596
+
597
+ VALID EXAMPLES:
598
+ 1. operations = DataFrameUtils._validate_agg_function(
599
+ operation = 'mean', ['col1', 'col2'])
600
+
601
+ 2. operations = DataFrameUtils._validate_agg_function(
602
+ operation = ['mean', 'min'], ['col1', 'col2'])
603
+
604
+ 3. operations = DataFrameUtils._validate_agg_function(
605
+ {'col1' : ['mean', 'min'], 'col2' : 'count'},
606
+ ['col1', 'col2'])
607
+
608
+ INVALID EXAMPLES:
609
+ 1. operations = DataFrameUtils._validate_agg_function(
610
+ operation = 'counter', ['col1', 'col2'])
611
+
612
+ 2. operations = DataFrameUtils._validate_agg_function(
613
+ {'col1' : ['mean', 'min'], 'col55' : 'count'},
614
+ ['col1', 'col2'])
615
+ """
616
+ operations = OrderedDict()
617
+
618
+ valid_aggregate_operations = UtilFuncs._get_valid_aggregate_operations()
619
+
620
+ if isinstance(func, str):
621
+ for column in col_names:
622
+ operations[column] = [func]
623
+ elif isinstance(func, list):
624
+ for column in col_names:
625
+ operations[column] = func
626
+ else:
627
+ for column in func:
628
+ if isinstance(func[column], str):
629
+ func[column] = [func[column]] # Converts string inside dict to list
630
+ operations = func
631
+
632
+ given_columns = operations.keys()
633
+ invalid_columns = []
634
+ all_operations = []
635
+ for col in given_columns:
636
+ all_operations.extend(operations[col])
637
+ if col not in col_names:
638
+ invalid_columns.append(col)
639
+ if len(invalid_columns) > 0: # If any of the columns specified is not present in dataframe
640
+ col_names.sort()
641
+ invalid_columns.sort()
642
+ msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
643
+ format(", ".join(invalid_columns), 'func', ", ".join(col_names))
644
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
645
+
646
+ all_operations = list(set(all_operations))
647
+ invalid_aggregates = []
648
+ for operation in all_operations:
649
+ if operation not in valid_aggregate_operations \
650
+ and operation not in UtilFuncs._get_valid_time_series_aggregate_operations():
651
+ invalid_aggregates.append(operation)
652
+ if len(invalid_aggregates) > 0: # If any of the aggregate operations specified is not valid
653
+ # To raise error message, let's add other time series aggregate operations those can be
654
+ # used with DataFrame.agg() method.
655
+ valid_aggregate_operations = valid_aggregate_operations + ['first', 'last', 'mode']
656
+ valid_aggregate_operations.sort()
657
+ invalid_aggregates.sort()
658
+ msg = Messages.get_message(MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION). \
659
+ format(", ".join(invalid_aggregates), ", ".join(valid_aggregate_operations))
660
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION)
661
+
662
+ return operations
663
+
664
+ @staticmethod
665
+ def _generate_aggregate_column_expression(df, column, operation, describe_op, tdp, **kwargs):
666
+ """
667
+ Function generate the aggregate column expression for the provided column
668
+ and aggregate function.
669
+
670
+ PARAMETERS:
671
+ df:
672
+ Required Argument.
673
+ Specifies teradataml DataFrame which is to be used to get the
674
+ desired aggregate column expression.
675
+ Types: teradataml DataFrame
676
+
677
+ column:
678
+ Required Argument.
679
+ Specifies the column name for which desired aggregate operation is
680
+ to be used.
681
+ Types: str
682
+
683
+ operation:
684
+ Required Argument.
685
+ Specifies the aggregate operation.
686
+ Types: str
687
+
688
+ describe_op:
689
+ Required Argument.
690
+ Specifies a boolean flag, that will decide whether the aggregate
691
+ operation is being performed for DataFrame.describe() or not.
692
+ Types: bool
693
+
694
+ tdp:
695
+ Required Argument.
696
+ Specifies a TeradataIdentifierPreparer object. It is required for
697
+ quoting.
698
+ Types: TeradataIdentifierPreparer
699
+
700
+ kwargs:
701
+ Specifies miscellaneous keyword arguments that can be passed to
702
+ aggregate functions.
703
+
704
+ RAISES:
705
+ AttributeError - In case ColumnExpression does not have desired aggregate
706
+ function implemnted.
707
+
708
+ RETURNS:
709
+ A boolean stating whether column is supported or not, New column name,
710
+ New column type, A string representing column aggregate expression,
711
+ invalid column information in case column has unsupported type for an
712
+ aggregate operation.
713
+
714
+ EXAMPLES:
715
+ column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
716
+ DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
717
+ describe_op=describe_op, percentile=percentile,
718
+ tdp=tdp, **kwargs)
719
+ """
720
+ try:
721
+ key_to_process = ""
722
+ # quote column names same as that of the Teradata reserved keywords.
723
+ if "sort_columns" in kwargs:
724
+ key_to_process = "sort_columns"
725
+ elif "sort_column" in kwargs:
726
+ key_to_process = "sort_column"
727
+
728
+ if key_to_process:
729
+ quoted_columns = UtilFuncs._process_for_teradata_keyword(kwargs[key_to_process])
730
+ kwargs[key_to_process] = quoted_columns
731
+
732
+ func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
733
+ new_column_name = column if describe_op else "{1}_{0}".format(column, operation)
734
+ # column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str
735
+ return True, new_column_name, NUMBER() if describe_op else func_expression.type, \
736
+ func_expression.compile_label(new_column_name), None
737
+ except AttributeError:
738
+ # We are here means, provided operation is invalid and is not supported.
739
+ # This if for internal purpose only.
740
+ # Validation of operations for "agg" should be done in "agg" only.
741
+ raise RuntimeError("Invalid aggregate function: {}".format(operation))
742
+ except RuntimeError:
743
+ # We are here means, column does not support the provided operation.
744
+ # We will ignore this and add the column to invalid column list.
745
+ # invalid_columns[operation].append("({0} - {1})".format(column, column_type)) OR
746
+ # We will raise Generic message, mentioning DF does not have any column with type
747
+ # supported to perform an operation.
748
+ if describe_op:
749
+ return True, tdp.quote(column), NUMBER(), 'null as {}'.format(tdp.quote(column)), None
750
+ else:
751
+ return False, None, None, None, "({0} - {1})".format(column, df[column].type)
752
+ except Exception:
753
+ raise
754
+
755
+ @staticmethod
756
+ def _construct_sql_expression_for_aggregations(df, column_names, column_types, func, percentile=.5,
757
+ describe_op=False, **kwargs):
758
+ """
759
+ Internal function to create and return the sql expression
760
+ corresponding to given operation, given column_names and
761
+ column_types.
762
+
763
+ Column_types are used to check whether all the datatypes are
764
+ valid types for given operation and throw exception if they
765
+ are not.
766
+
767
+ PARAMETERS :
768
+ df:
769
+ Required Argument.
770
+ Specifies teradataml DataFrame which is to be used to get the desired
771
+ aggregate column expression.
772
+ Types: teradataml DataFrame
773
+
774
+ column_names:
775
+ Required Argument.
776
+ Specifies the column names for which desired aggregate operation is
777
+ to be executed.
778
+ Types: List of strings
779
+
780
+ column_types:
781
+ Required Argument.
782
+ Specifies the respective column types for column names.
783
+ Types: List of teradatasqlalchemy types
784
+
785
+ func:
786
+ Required Argument.
787
+ Specifies the aggregate function(s) to be applied on teradataml
788
+ DataFrame columns.
789
+ Types: string, dictionary or list of strings/functions.
790
+ Accepted combinations are:
791
+ 1. String function name
792
+ 2. List of functions
793
+ 3. Dictionary containing column name as key and aggregate
794
+ function name (string or list of strings) as value
795
+
796
+ percentile:
797
+ Optional Argument.
798
+ Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
799
+ The default is .5, which returns the 50th percentiles.
800
+ Types: float
801
+
802
+ describe_op:
803
+ Optional Argument.
804
+ Specifies a boolean flag, that will decide whether the aggregate operation being
805
+ performed is for DataFrame.describe() or not.
806
+ Types: bool
807
+
808
+ kwargs:
809
+ Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
810
+
811
+ RETURNS :
812
+ a)sql expression as
813
+ 1. 'min(col1) as min_col1, min(col2) as min_col2' if
814
+ col1 and col2 are the columns in Dataframe and
815
+ operation is 'min'
816
+ 2. 'max(col1) as max_col1, max(col2) as max_col2' if
817
+ col1 and col2 are the columns in Dataframe and
818
+ operation is 'max'
819
+ 3. 'min(col1) as min_col1, stddev_samp(col2) as
820
+ std_col2' if col1, col2 are the columns in
821
+ Dataframe and operations are min, std.
822
+ etc...
823
+ b) new columns' names (eg min_col1, min_col2 ...)
824
+ c) new columns' types
825
+ RAISES:
826
+ TeradataMLException
827
+ 1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
828
+ aggregate operations do not support specified columns.
829
+
830
+ Possible Value :
831
+ No results. Below is/are the error message(s):
832
+ All selected columns [(col1 - VARCHAR)] is/are
833
+ unsupported for 'sum' operation.
834
+
835
+ 2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
836
+ operation(s) received in parameter 'func' is/are
837
+ invalid.
838
+
839
+ Possible Value :
840
+ Invalid aggregate operation(s): minimum, counter.
841
+ Valid aggregate operation(s): count, max, mean, min,
842
+ std, sum.
843
+
844
+ 3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
845
+ specified in func is not present in the dataframe.
846
+
847
+ Possible Value :
848
+ Invalid column(s) given in parameter func: col1.
849
+ Valid column(s) : A, B, C, D.
850
+
851
+ EXAMPLES:
852
+ col_names, col_types = \
853
+ df_utils._get_column_names_and_types_from_metaexpr(
854
+ self._metaexpr)
855
+ expr, new_col_names, new_col_types = \
856
+ df_utils._construct_sql_expression_for_aggregations(
857
+ col_names, col_types, 'min')
858
+
859
+ expr1, new_col_names1, new_col_types1 = \
860
+ df_utils._construct_sql_expression_for_aggregations(
861
+ col_names, col_types, ['min', 'sum'])
862
+
863
+ expr2, new_col_names2, new_col_types2 = \
864
+ df_utils._construct_sql_expression_for_aggregations(
865
+ col_names, col_types, {'col1 : ['min', 'sum'],
866
+ 'col2' : 'mean'})
867
+
868
+ """
869
+
870
+ # eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
871
+ # FLOAT(precision=0)]
872
+
873
+ # eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
874
+ # <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
875
+ # <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
876
+
877
+ # If function is of type time series aggregates, we process aggregation differently.
878
+ if not isinstance(func, str):
879
+ # If func is not instance of string, that means function call is
880
+ # from DataFrame.agg(). And is made to process multiple functions.
881
+ # We will process the same differently, as we need to map and serialize the
882
+ # column names and aggregate function operate on.
883
+ # If we have just function to be executed on complete DataFrame, then we don't need
884
+ # this extra processing. Also, if call is from DataFrame.agg(), time series aggregate check
885
+ # is not required. As special Time Series aggregate functions cannot be used in
886
+ # DataFrame.agg().
887
+ return DataFrameUtils._construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types,
888
+ func, percentile, describe_op,
889
+ **kwargs)
890
+
891
+ as_time_series_aggregate = False
892
+ if "as_time_series_aggregate" in kwargs.keys():
893
+ as_time_series_aggregate = kwargs["as_time_series_aggregate"]
894
+
895
+ if as_time_series_aggregate and func in ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top',
896
+ 'top with ties']:
897
+ return DataFrameUtils._construct_sql_expression_for_time_series_aggregations(df, column_names, column_types,
898
+ func, **kwargs)
899
+
900
+ tdp = preparer(td_dialect)
901
+
902
+ # This variable is used to decide whether DataFrame has all columns unsupported
903
+ # for the provided operations.
904
+ all_unsupported_columns = True
905
+ valid_columns = []
906
+ invalid_columns = []
907
+ new_column_names = []
908
+ new_column_types = []
909
+ for column in column_names:
910
+ column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
911
+ DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
912
+ describe_op=describe_op, percentile=percentile,
913
+ tdp=tdp, **kwargs)
914
+
915
+ if column_supported:
916
+ all_unsupported_columns = False
917
+ new_column_names.append(new_column_name)
918
+ new_column_types.append(new_column_type)
919
+ valid_columns.append(column_aggr_expr)
920
+ else:
921
+ invalid_columns.append("({0} - {1})".format(column, df[column].type))
922
+
923
+ if all_unsupported_columns:
924
+
925
+ error_msgs = []
926
+ invalid_columns.sort() # Helps in catching the columns in lexicographic order
927
+ error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(", ".join(invalid_columns),
928
+ func)
929
+ error_msgs.append(error)
930
+
931
+ if len(valid_columns) == 0: # No supported columns in the given list of columns
932
+ raise TeradataMlException(Messages.get_message(
933
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
934
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
935
+
936
+ # quote column names same as that of the Teradata reserved keywords.
937
+ quote_column_name = [UtilFuncs._process_for_teradata_keyword(col) for col in column_names]
938
+
939
+ # Actual columns should be retained if "drop_columns" is set to False.
940
+ if kwargs.get("drop_columns") is False:
941
+ valid_columns = quote_column_name + valid_columns
942
+ new_column_names = column_names + new_column_names
943
+ new_column_types = column_types + new_column_types
944
+
945
+ aggregate_expr = ", ".join(valid_columns)
946
+ return aggregate_expr, new_column_names, new_column_types
947
+
948
+ @staticmethod
949
+ def _construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types, func, percentile=.5,
950
+ describe_op=False, **kwargs):
951
+ """
952
+ Internal function to create and return the sql expression
953
+ corresponding to given operation, given column_names and
954
+ column_types.
955
+
956
+ Column_types are used to check whether all the datatypes are
957
+ valid types for given operation and throw exception if they
958
+ are not.
959
+
960
+ PARAMETERS :
961
+ df:
962
+ Required Argument.
963
+ Specifies teradataml DataFrame which is to be used to get the desired
964
+ aggregate column expression.
965
+ Types: teradataml DataFrame
966
+
967
+ column_names:
968
+ Required Argument.
969
+ Specifies the column names for which desired aggregate operation is
970
+ to be executed.
971
+ Types: List of strings
972
+
973
+ column_types:
974
+ Required Argument.
975
+ Specifies the respective column types for column names.
976
+ Types: List of teradatasqlalchemy types
977
+
978
+ func:
979
+ Required Argument.
980
+ Specifies the aggregate function(s) to be applied on teradataml
981
+ DataFrame columns.
982
+ Types: string, dictionary or list of strings/functions.
983
+ Accepted combinations are:
984
+ 1. String function name
985
+ 2. List of functions
986
+ 3. Dictionary containing column name as key and aggregate
987
+ function name (string or list of strings) as value
988
+
989
+ percentile:
990
+ Optional Argument.
991
+ Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
992
+ The default is .5, which returns the 50th percentiles.
993
+ Types: float
994
+
995
+ describe_op:
996
+ Optional Argument.
997
+ Specifies a boolean flag, that will decide whether the aggregate operation being
998
+ performed is for DataFrame.describe() or not.
999
+ Types: bool
1000
+
1001
+ kwargs:
1002
+ Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
1003
+
1004
+ RETURNS :
1005
+ a)sql expression as
1006
+ 1. 'min(col1) as min_col1, min(col2) as min_col2' if
1007
+ col1 and col2 are the columns in Dataframe and
1008
+ operation is 'min'
1009
+ 2. 'max(col1) as max_col1, max(col2) as max_col2' if
1010
+ col1 and col2 are the columns in Dataframe and
1011
+ operation is 'max'
1012
+ 3. 'min(col1) as min_col1, stddev_samp(col2) as
1013
+ std_col2' if col1, col2 are the columns in
1014
+ Dataframe and operations are min, std.
1015
+ etc...
1016
+ b) new columns' names (eg min_col1, min_col2 ...)
1017
+ c) new columns' types
1018
+ RAISES:
1019
+ TeradataMLException
1020
+ 1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
1021
+ aggregate operations do not support specified columns.
1022
+
1023
+ Possible Value :
1024
+ No results. Below is/are the error message(s):
1025
+ All selected columns [(col1 - VARCHAR)] is/are
1026
+ unsupported for 'sum' operation.
1027
+
1028
+ 2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
1029
+ operation(s) received in parameter 'func' is/are
1030
+ invalid.
1031
+
1032
+ Possible Value :
1033
+ Invalid aggregate operation(s): minimum, counter.
1034
+ Valid aggregate operation(s): count, max, mean, min,
1035
+ std, sum.
1036
+
1037
+ 3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
1038
+ specified in func is not present in the dataframe.
1039
+
1040
+ Possible Value :
1041
+ Invalid column(s) given in parameter func: col1.
1042
+ Valid column(s) : A, B, C, D.
1043
+
1044
+ EXAMPLES:
1045
+ col_names, col_types = \
1046
+ df_utils._get_column_names_and_types_from_metaexpr(
1047
+ self._metaexpr)
1048
+ expr, new_col_names, new_col_types = \
1049
+ df_utils._construct_sql_expression_for_aggregations_for_agg(
1050
+ col_names, col_types, 'min')
1051
+
1052
+ expr1, new_col_names1, new_col_types1 = \
1053
+ df_utils._construct_sql_expression_for_aggregations_for_agg(
1054
+ col_names, col_types, ['min', 'sum'])
1055
+
1056
+ expr2, new_col_names2, new_col_types2 = \
1057
+ df_utils._construct_sql_expression_for_aggregations_for_agg(
1058
+ col_names, col_types, {'col1 : ['min', 'sum'],
1059
+ 'col2' : 'mean'})
1060
+
1061
+ """
1062
+ # If function is of type time series aggregates, we process aggregation differently.
1063
+ # Also, one is not supposed to pass below time series aggreagtes to DataFrame.agg():
1064
+ # ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top', 'top with ties']
1065
+ # Thus, no extra processing is required for time series aggregates over here.
1066
+
1067
+ # 'operations' contains dict of columns -> list of aggregate operations
1068
+ operations = DataFrameUtils._validate_agg_function(func, column_names)
1069
+
1070
+ all_valid_columns = []
1071
+ all_invalid_columns = {}
1072
+ all_new_column_names = []
1073
+ all_new_column_types = []
1074
+
1075
+ # For each column, the value is True if there is at least one valid operation (operation on valid datatype)
1076
+ column_supported = {}
1077
+ tdp = preparer(td_dialect)
1078
+ for column in operations:
1079
+ column_supported[column] = False
1080
+ valid_columns = []
1081
+ invalid_columns = {}
1082
+ new_column_names = []
1083
+ new_column_types = []
1084
+ for operation in operations[column]:
1085
+ is_colop_supported, new_col, new_coltype, column_aggr_expr, invalid_column_info = \
1086
+ DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=operation,
1087
+ describe_op=describe_op, percentile=percentile,
1088
+ tdp=tdp, **kwargs)
1089
+ if is_colop_supported:
1090
+ column_supported[column] = is_colop_supported
1091
+ new_column_names.append(new_col)
1092
+ new_column_types.append(new_coltype)
1093
+ valid_columns.append(column_aggr_expr)
1094
+ else:
1095
+ if operation in invalid_columns:
1096
+ invalid_columns[operation].append(invalid_column_info)
1097
+ else:
1098
+ invalid_columns[operation] = [invalid_column_info]
1099
+
1100
+ all_valid_columns.extend(valid_columns)
1101
+ all_new_column_names.extend(new_column_names)
1102
+ all_new_column_types.extend(new_column_types)
1103
+
1104
+ for operation in invalid_columns:
1105
+ if operation in all_invalid_columns:
1106
+ all_invalid_columns[operation].extend(invalid_columns[operation])
1107
+ else:
1108
+ all_invalid_columns[operation] = invalid_columns[operation]
1109
+
1110
+ unsupported_columns = [col for col in column_supported if not column_supported[col]]
1111
+ unsupported_columns.sort() # helps in catching the columns in lexicographic order
1112
+
1113
+ error_msgs = []
1114
+ for operation in sorted(all_invalid_columns):
1115
+ all_invalid_columns[operation].sort() # helps in catching the columns in
1116
+ # lexicographic order
1117
+ error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(
1118
+ ", ".join(all_invalid_columns[operation]), operation)
1119
+ error_msgs.append(error)
1120
+
1121
+ if not all(column_supported[oper] for oper in column_supported):
1122
+ new_msg = MessageCodes.TDMLDF_AGGREGATE_AGG_DICT_ERR.value.format(", ".join(unsupported_columns))
1123
+ error_msgs.append(new_msg)
1124
+ msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs))
1125
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1126
+
1127
+ elif len(all_valid_columns) == 0: # No supported columns in the given list of columns
1128
+ raise TeradataMlException(Messages.get_message(
1129
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
1130
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1131
+
1132
+ aggregate_expr = ", ".join(all_valid_columns)
1133
+ return aggregate_expr, all_new_column_names, all_new_column_types
1134
+
1135
+ @staticmethod
1136
+ def _construct_sql_expression_for_time_series_aggregations(df, column_names, column_types, func, **kwargs):
1137
+ """
1138
+ Internal function to create and return the sql expression
1139
+ corresponding to given time series function, given column_names and
1140
+ column_types.
1141
+
1142
+ Column_types are used to check whether all the datatypes are
1143
+ valid types for given operation and throw exception if they
1144
+ are not.
1145
+
1146
+ NOTE:
1147
+ This function should be used only for time series aggregates.
1148
+
1149
+ PARAMETERS :
1150
+ df:
1151
+ Required Argument.
1152
+ Specifies teradataml DataFrame which is to be used to get the desired
1153
+ aggregate column expression.
1154
+ Types: teradataml DataFrame
1155
+
1156
+ column_names:
1157
+ Required Argument.
1158
+ Specifies the column names for which desired aggregate operation is
1159
+ to be executed.
1160
+ Types: List of strings
1161
+
1162
+ column_types:
1163
+ Required Argument.
1164
+ Specifies the respective column types for column names.
1165
+ Types: List of teradatasqlalchemy types
1166
+
1167
+ func:
1168
+ Required Argument.
1169
+ Specifies the aggregate function(s) to be applied on teradataml
1170
+ DataFrame columns. For Time Series aggregates it is usually a string.
1171
+ Types: str
1172
+
1173
+ kwargs:
1174
+ Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
1175
+
1176
+ RETURNS :
1177
+ a)sql expression as
1178
+ 1. 'bottom(2, "col1") as "bottom2col1"' if
1179
+ col1 and col2 are the columns in Dataframe and
1180
+ operation is 'bottom'
1181
+ etc...
1182
+ b) new columns' names (eg min_col1, min_col2 ...)
1183
+ c) new columns' types
1184
+
1185
+ RAISES:
1186
+ None.
1187
+
1188
+ EXAMPLES:
1189
+ colname_to_numvalues = {"col1" : 2, "col2": 3}
1190
+ kwargs = {"colname_to_numvalues": colname_to_numvalues}
1191
+ aggregate_expr, column_names, column_types = \
1192
+ df_utils._construct_sql_expression_for_time_series_aggregations(column_names, column_types,
1193
+ func, **kwargs)
1194
+
1195
+ """
1196
+
1197
+ # eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
1198
+ # FLOAT(precision=0)]
1199
+
1200
+ # eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
1201
+ # <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
1202
+ # <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
1203
+
1204
+ col_names_and_types = dict(zip(column_names, column_types))
1205
+ tdp = preparer(td_dialect)
1206
+
1207
+ select_columns = []
1208
+ new_column_names = []
1209
+ new_column_types = []
1210
+ if func in ["bottom", "bottom with ties", "top", "top with ties"]:
1211
+ # Processing for bottom and top.
1212
+ # Function name to be used in column aliasing.
1213
+ column_alias_func = func.replace(" ", "_")
1214
+ bottom_col_val = kwargs["colname_to_numvalues"]
1215
+ for column in sorted(list(bottom_col_val.keys())):
1216
+ new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
1217
+ quoted_parent_column_name = tdp.quote("{0}".format(column))
1218
+ quoted_new_column_name = tdp.quote(new_col_name)
1219
+ select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
1220
+ quoted_parent_column_name, quoted_new_column_name))
1221
+ new_column_names.append(new_col_name)
1222
+ new_column_types.append(col_names_and_types[column])
1223
+
1224
+ if func == "delta_t":
1225
+ # Argument processing for DELTA-T
1226
+ new_column_names.append("delta_t_td_timecode")
1227
+ quoted_new_column_name = tdp.quote(new_column_names[0])
1228
+ new_column_types.append(PERIOD_TIMESTAMP)
1229
+ select_columns.append("{0}((WHERE {1}), (WHERE {2})) as {3}".format(func, kwargs["start_condition"],
1230
+ kwargs["end_condition"],
1231
+ quoted_new_column_name))
1232
+
1233
+ if func == 'mad':
1234
+ # Processing for Median Absolute Deviation.
1235
+ # Function name to be used in column aliasing.
1236
+ column_alias_func = func.replace(" ", "_")
1237
+ bottom_col_val = kwargs["colname_to_numvalues"]
1238
+ for column in sorted(list(bottom_col_val.keys())):
1239
+ new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
1240
+ quoted_parent_column_name = tdp.quote("{0}".format(column))
1241
+ quoted_new_column_name = tdp.quote(new_col_name)
1242
+ select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
1243
+ quoted_parent_column_name, quoted_new_column_name))
1244
+ new_column_names.append(new_col_name)
1245
+ if type(col_names_and_types[column]) in [DECIMAL, NUMBER]:
1246
+ # If column types is DECIMAL or NUMBER, then output column types should also be same.
1247
+ # Otherwise, it is FLOAT.
1248
+ new_column_types.append(col_names_and_types[column])
1249
+ else:
1250
+ new_column_types.append(FLOAT())
1251
+
1252
+ if "default_constant_for_columns" in kwargs.keys():
1253
+ column_names = kwargs["default_constant_for_columns"]
1254
+ column_types = [col_names_and_types[column] for column in column_names]
1255
+ if len(column_names) > 0:
1256
+ aggregate_expr, all_new_column_names, all_new_column_types = \
1257
+ DataFrameUtils._construct_sql_expression_for_aggregations(df=df, column_names=column_names,
1258
+ column_types=column_types, func=func,
1259
+ )
1260
+ aggregate_expr_default_column_list = [col.strip() for col in aggregate_expr.split(",")]
1261
+ select_columns = select_columns + aggregate_expr_default_column_list
1262
+ new_column_names = new_column_names + all_new_column_names
1263
+ new_column_types = new_column_types + all_new_column_types
1264
+
1265
+
1266
+ aggregate_expr = ", ".join(select_columns)
1267
+ return aggregate_expr, new_column_names, new_column_types
1268
+
1269
+ @staticmethod
1270
+ def _invalid_describe_column(df, columns, metaexpr, groupby_column_list):
1271
+ """
1272
+ Internal function to validate columns provided to describe() is correct or not,
1273
+ when DataFrame is output of groupby and groupby_time.
1274
+
1275
+ PARAMETERS:
1276
+ df:
1277
+ Required Argument.
1278
+ Specifies teradataml DataFrame we are collecting statistics for.
1279
+ Types: str
1280
+
1281
+ columns:
1282
+ Optional Argument.
1283
+ Specifies the name(s) of columns we are collecting statistics for.
1284
+ Types: str ot List of strings (str)
1285
+
1286
+ metaexpr:
1287
+ Required Argument.
1288
+ Specifies the meta expression for the dataframe.
1289
+ Types: _MetaExpression
1290
+
1291
+ groupby_column_list:
1292
+ Optional Argument.
1293
+ Specifies the group by columns for the dataframe.
1294
+ Default Values: None.
1295
+ Types: str ot List of strings (str)
1296
+
1297
+ Returns:
1298
+ None
1299
+
1300
+ Raises:
1301
+ TeradataMLException
1302
+ """
1303
+ invalid_columns = [_column for _column in groupby_column_list if columns is not None
1304
+ and _column in columns]
1305
+ if len(invalid_columns) > 0:
1306
+ all_columns = [col.name for col in metaexpr.c]
1307
+ valid_columns = [item for item in all_columns if item not in groupby_column_list]
1308
+ msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
1309
+ format(", ".join(invalid_columns), 'columns', ", ".join(valid_columns))
1310
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
1311
+
1312
+ @staticmethod
1313
+ def _construct_describe_query(df, columns, metaexpr, percentiles, function_label, groupby_column_list=None,
1314
+ include=None, is_time_series_aggregate=False, verbose=False, distinct=False,
1315
+ statistics=None, **kwargs):
1316
+ """
1317
+ Internal function to create the sql query for describe().
1318
+
1319
+ PARAMETERS :
1320
+ df:
1321
+ Required Argument.
1322
+ Specifies teradataml DataFrame we are collecting statistics for.
1323
+ Types: str
1324
+
1325
+ columns:
1326
+ Optional Argument.
1327
+ Specifies the name(s) of columns we are collecting statistics for.
1328
+ Types: str ot List of strings (str)
1329
+
1330
+ metaexpr:
1331
+ Required Argument.
1332
+ Specifies the meta expression for the dataframe.
1333
+ Types: _MetaExpression
1334
+
1335
+ percentiles:
1336
+ Required Argument.
1337
+ Specifies a list of values between 0 and 1.
1338
+ Types: List of floats
1339
+
1340
+ function_label:
1341
+ Required Argument.
1342
+ Specifies a string value used as the label for the aggregate function column.
1343
+ Types: str
1344
+
1345
+ groupby_column_list:
1346
+ Optional Argument.
1347
+ Specifies the group by columns for the dataframe.
1348
+ Default Values: None.
1349
+ Types: str ot List of strings (str)
1350
+
1351
+ include:
1352
+ Optional Argument.
1353
+ Specifies a string that must be "all" or None. If "all", then all columns will be included.
1354
+ Otherwise, only numeric columns are used for collecting statistics.
1355
+ Default Values: None.
1356
+ Types: str
1357
+
1358
+ is_time_series_aggregate:
1359
+ Optional Argument.
1360
+ Specifies a flag stating whether describe operation is time series aggregate or not.
1361
+ Default Values: False.
1362
+ Types: bool
1363
+
1364
+ verbose:
1365
+ Optional Argument.
1366
+ Specifies a flag stating whether DESCRIBE VERBOSE option for time series aggregate is to be
1367
+ performed or not.
1368
+ Default Values: False.
1369
+ Types: bool
1370
+
1371
+ distinct:
1372
+ Optional Argument.
1373
+ Specifies a flag that decides whether to consider duplicate rows in calculation or not.
1374
+ Default Values: False
1375
+ Types: bool
1376
+
1377
+ kwargs:
1378
+ Optional Arguments.
1379
+ Keyword argument for time series aggregate functions.
1380
+
1381
+
1382
+ RETURNS :
1383
+ A SQL query like:
1384
+ select 'count' as "func", cast(count("Feb") as Number) as "Feb", cast(count(accounts) as Number) as accounts from "PYUSER"."salesview"
1385
+ union all
1386
+ select 'mean' as "func", cast(avg("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
1387
+ union all
1388
+ select 'std' as "func", cast(stddev_samp("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
1389
+ union all
1390
+ select 'min' as "func", cast(min("Feb") as Number) as "Feb", cast(min(accounts) as Number) as accounts from "PYUSER"."salesview"
1391
+ union all
1392
+ select '25%' as "func", percentile_cont(0.25) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1393
+ union all
1394
+ select '50%' as "func", percentile_cont(0.5) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1395
+ union all
1396
+ select '75%' as "func", percentile_cont(0.75) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1397
+ union all
1398
+ select 'max' as "func", cast(max("Feb") as Number) as "Feb", cast(max(accounts) as Number) as accounts from "PYUSER"."salesview"
1399
+
1400
+ RAISES:
1401
+ TeradataMLException
1402
+
1403
+ EXAMPLES:
1404
+ agg_query = \
1405
+ df_utils._construct_describe_query("self._table_name", self._metaexpr, [.25, .5, .75], "func", self.groupby_column_list)
1406
+ agg_query = \
1407
+ df_utils._construct_describe_query("self._table_name", self._metaexpr, [.3, .6], "func", self.groupby_column_list, include="all")
1408
+
1409
+ """
1410
+ table_name = df._table_name
1411
+ operators = ["count", "mean", "std", "min", "percentile", "max"]
1412
+ all_operators = ["count", "unique", "mean", "std", "min", "percentile", "max"]
1413
+
1414
+ if is_time_series_aggregate and verbose:
1415
+ # Time Series Aggregate Operators for Vantage DESCRIBE function with verbose
1416
+ operators = ['max', 'mean', 'median', 'min', 'mode', "percentile", 'std']
1417
+ elif is_time_series_aggregate and not verbose:
1418
+ # Time Series Aggregate Operators for Vantage DESCRIBE function.
1419
+ operators = ['max', 'mean', 'min', 'std']
1420
+
1421
+ col_names = []
1422
+ col_types = []
1423
+ sel_agg_stmts = []
1424
+ tdp = preparer(td_dialect)
1425
+ quoted_function_label = tdp.quote(function_label)
1426
+
1427
+ if include is not None and include == 'all' and not is_time_series_aggregate:
1428
+ operators = all_operators
1429
+
1430
+ if include is None and statistics is not None:
1431
+ operators = statistics
1432
+
1433
+ table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
1434
+ is_time_series_aggregate, **kwargs)
1435
+
1436
+ for col in metaexpr.c:
1437
+ if (include is None and type(col.type) in UtilFuncs()._get_numeric_datatypes()) or include == 'all' or statistics is not None:
1438
+ if not(groupby is not None and col.name in groupby_column_list):
1439
+ if columns is None or col.name in columns:
1440
+ col_names.append(col.name)
1441
+ col_types.append(col.type)
1442
+
1443
+
1444
+ if len(col_names) == 0:
1445
+ raise TeradataMlException(
1446
+ Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR,
1447
+ "The DataFrame does not contain numeric columns"),
1448
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1449
+ for op in operators:
1450
+ if op == "percentile":
1451
+ for p in percentiles:
1452
+ agg_expr, new_col_names, new_col_types = \
1453
+ DataFrameUtils._construct_sql_expression_for_aggregations(df,
1454
+ col_names, col_types, op, percentile=p, describe_op=True, distinct=distinct,
1455
+ as_time_series_aggregate=is_time_series_aggregate)
1456
+ sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}%' as varchar(6)) as \"{1}\", {2} from {3} ".format(
1457
+ int(p*100), quoted_function_label, agg_expr, table_name, sel_groupby))
1458
+ else:
1459
+ agg_expr, new_col_names, new_col_types = \
1460
+ DataFrameUtils._construct_sql_expression_for_aggregations(df,
1461
+ col_names, col_types, op, describe_op=True, distinct=distinct,
1462
+ as_time_series_aggregate=is_time_series_aggregate)
1463
+ sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}' as varchar(6)) as \"{1}\", \n\t{2} \nfrom \n\t{3} ".format(
1464
+ op, quoted_function_label, agg_expr, table_name, sel_groupby))
1465
+ return " \nunion all\n ".join(sel_agg_stmts)
1466
+
1467
+ @staticmethod
1468
+ def _process_groupby_clause(table_name, groupby_column_list, is_time_series_aggregate, **kwargs):
1469
+ """
1470
+ Internal function used to process and generate GROUP BY or GROUP BY TIME clauses required for
1471
+ query to be run for describe operation.
1472
+
1473
+ PARAMETERS:
1474
+ table_name:
1475
+ Required Arguments.
1476
+ Specifies table name to be used for forming describe query.
1477
+ Types: str
1478
+
1479
+ groupby_column_list:
1480
+ Required Arguments.
1481
+ Specifies list of column names involved in Group By.
1482
+ Types: List of Strings.
1483
+
1484
+ is_time_series_aggregate:
1485
+ Required Arguments.
1486
+ Specifies a boolean stating whether GROUP BY clause to be formed is for
1487
+ Time series aggregate or not.
1488
+ Types: bool
1489
+
1490
+ kwargs:
1491
+ Optional Arguments.
1492
+ Keyword argument for time series aggregate functions.
1493
+
1494
+ RETURNS:
1495
+ 1. Table Name appended with GROUP BY clause.
1496
+ 2. Column projection string for GROUP BY columns.
1497
+ 3. Group By Clause.
1498
+
1499
+ RAISES:
1500
+ None.
1501
+
1502
+ EXAMPLES:
1503
+ table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
1504
+ is_time_series_aggregate, **kwargs)
1505
+
1506
+ """
1507
+ sel_groupby = ""
1508
+ grp_by_clause = None
1509
+
1510
+ if is_time_series_aggregate:
1511
+ # For time series aggregate timebucket_duration is must so, it'll be always present in kwargs.
1512
+ grp_by_clause = "GROUP BY TIME ({0}".format(kwargs['timebucket_duration'])
1513
+
1514
+ # Add columns in value expression to GROUP BY TIME
1515
+ if 'value_expression' in kwargs and \
1516
+ kwargs['value_expression'] is not None and \
1517
+ len(kwargs['value_expression']) > 0:
1518
+ grp_by_clause = "{0} and {1}".format(grp_by_clause, ", ".join(kwargs['value_expression']))
1519
+
1520
+ # Complete the parenthesis for GROUP BY TIME
1521
+ grp_by_clause = "{0})".format(grp_by_clause)
1522
+
1523
+ # Add Time code column information.
1524
+ if 'timecode_column' in kwargs and \
1525
+ kwargs['timecode_column'] is not None and \
1526
+ len(kwargs['timecode_column']) > 0:
1527
+ if 'sequence_column' in kwargs and \
1528
+ kwargs['timecode_column'] is not None and \
1529
+ len(kwargs['timecode_column']) > 0:
1530
+ grp_by_clause = "{0} USING TIMECODE({1}, {2})".format(grp_by_clause, kwargs['timecode_column'],
1531
+ kwargs['sequence_column'])
1532
+ else:
1533
+ grp_by_clause = "{0} USING TIMECODE({1})".format(grp_by_clause, kwargs['timecode_column'])
1534
+
1535
+ # Add Fill inforamtion
1536
+ if 'fill' in kwargs and kwargs['fill'] is not None and len(kwargs['fill']) > 0:
1537
+ grp_by_clause = "{0} FILL({1})".format(grp_by_clause, kwargs['fill'])
1538
+
1539
+ else:
1540
+ if groupby_column_list is not None:
1541
+ grp_by_clause = "GROUP BY {0}".format(",".join(groupby_column_list))
1542
+
1543
+ if grp_by_clause is not None:
1544
+ table_name = "{0} \n{1}".format(table_name, grp_by_clause)
1545
+ tdp = preparer(td_dialect)
1546
+ for g in groupby_column_list:
1547
+ if is_time_series_aggregate:
1548
+ if g == "TIMECODE_RANGE":
1549
+ g = "$TD_TIMECODE_RANGE"
1550
+
1551
+ if "GROUP BY TIME" in g:
1552
+ g = "$TD_GROUP_BY_TIME"
1553
+
1554
+ quoted_name = tdp.quote(g)
1555
+ sel_groupby += "{0}, ".format(quoted_name)
1556
+
1557
+ return table_name, sel_groupby, grp_by_clause
1558
+
1559
+ @staticmethod
1560
+ def _get_column_names_and_types_from_metaexpr(metaexpr):
1561
+ """
1562
+ Internal function to return column names and respective types
1563
+ given _metaexpr.
1564
+
1565
+ PARAMETERS:
1566
+ metaexpr:
1567
+ Required Argument.
1568
+ Dataframe's metaexpr. It is used to get column names and types.
1569
+ Types: MetaExpression
1570
+
1571
+ RETURNS:
1572
+ Two lists - one for column names and another for column types
1573
+
1574
+ RAISES:
1575
+ None
1576
+
1577
+ EXAMPLES:
1578
+ dfUtils._get_column_names_and_types_from_metaexpr(
1579
+ df._metaexpr)
1580
+ """
1581
+ # Constructing New Column names & Types for selected columns ONLY using Parent _metaexpr
1582
+ col_names = []
1583
+ col_types = []
1584
+ for c in metaexpr.c:
1585
+ col_names.append(c.name)
1586
+ col_types.append(c.type)
1587
+
1588
+ return col_names, col_types
1589
+
1590
+ @staticmethod
1591
+ def _insert_all_from_table(to_table_name, from_table_name, column_list, schema_name,
1592
+ temporary=False):
1593
+ """
1594
+ Inserts all records from one table into the second, using columns ordered by column list.
1595
+
1596
+ PARAMETERS:
1597
+ to_table_name - String specifying name of the SQL Table to insert to.
1598
+ insert_from_table_name - String specifying name of the SQL Table to insert from.
1599
+ column_list - List of strings specifying column names used in the insertion.
1600
+ schema_name - Name of the database schema to insert table data into.
1601
+ temporary - Specifies whether to create Vantage tables as permanent or volatile.
1602
+ Default: False
1603
+ Note: When True:
1604
+ 1. volatile Tables are created, and
1605
+ 2. schema_name is ignored.
1606
+ When False, permanent tables are created.
1607
+ RETURNS:
1608
+ None
1609
+
1610
+ RAISES:
1611
+ Database error if an error occurred while executing the insert command.
1612
+
1613
+ EXAMPLES:
1614
+ df_utils._insert_all_from_table('table1_name', 'table2_name', ['col1', 'col2', 'col3'])
1615
+ """
1616
+ tdp = preparer(td_dialect)
1617
+
1618
+ # Construct INSERT command.
1619
+ column_order_string = ', '.join([tdp.quote("{0}".format(element)) for element in column_list])
1620
+
1621
+ if schema_name:
1622
+ full_to_table_name = tdp.quote(schema_name) + "." + tdp.quote(to_table_name)
1623
+ elif temporary:
1624
+ full_to_table_name = tdp.quote(to_table_name)
1625
+ else:
1626
+ full_to_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
1627
+ to_table_name)
1628
+
1629
+ insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name, from_table_name, column_order_string)
1630
+
1631
+ # Execute INSERT command.
1632
+ return UtilFuncs._execute_ddl_statement(insert_sql)
1633
+
1634
+ @staticmethod
1635
+ def _dataframe_has_column(data, column):
1636
+ """
1637
+ Function to check whether column names in columns are present in given dataframe or not.
1638
+ This function is used currently only for Analytics wrappers.
1639
+
1640
+ PARAMETERS:
1641
+ data - teradataml DataFrame to check against for column existence.
1642
+ column - Column name (a string).
1643
+
1644
+ RAISES:
1645
+ None
1646
+
1647
+ EXAMPLES:
1648
+ DataFrameUtils._dataframe_has_column(data, col)
1649
+ """
1650
+ if column in [c.name for c in data._metaexpr.c]:
1651
+ return True
1652
+
1653
+ return False
1654
+
1655
+ @staticmethod
1656
+ def _get_row_count(table_name):
1657
+ """
1658
+ Function to return the row count of a teradataml Dataframe.
1659
+ This function is used currently to determine the shape/size of a dataframe.
1660
+
1661
+ PARAMETERS:
1662
+ table_name - Name of the table to get the row count for.
1663
+
1664
+ RAISES:
1665
+ TeradataMlException (TDMLDF_INFO_ERROR)
1666
+
1667
+ EXAMPLES:
1668
+ DataFrameUtils._get_row_count(table_name)
1669
+ """
1670
+ # Construct COUNT(*) Query
1671
+ try:
1672
+ row_count_query = SQLBundle._build_nrows_print_query(table_name)
1673
+ res = execute_sql(row_count_query)
1674
+ return res.fetchone()[0]
1675
+
1676
+ except TeradataMlException:
1677
+ raise
1678
+
1679
+ except Exception as err:
1680
+ # TODO Better handle the level of information being presented to the user with logging
1681
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
1682
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1683
+
1684
+ @staticmethod
1685
+ def _get_scalar_value(table_name):
1686
+ """
1687
+ Function to return the the only 1x1 (scalar) value from a teradataml Dataframe.
1688
+
1689
+ PARAMETERS:
1690
+ table_name - Name of the table to get the value from.
1691
+
1692
+ RAISES:
1693
+ TeradataMlException (TDMLDF_INFO_ERROR)
1694
+
1695
+ EXAMPLES:
1696
+ DataFrameUtils._get_scalar_value(table_name)
1697
+ """
1698
+ # Construct the base Query
1699
+ try:
1700
+ select_query = SQLBundle._build_base_query(table_name)
1701
+ res = execute_sql(select_query)
1702
+ return res.fetchone()[0]
1703
+
1704
+ except TeradataMlException:
1705
+ raise
1706
+
1707
+ except Exception as err:
1708
+ # TODO Better handle the level of information being presented to the user with logging
1709
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
1710
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1711
+
1712
+ @staticmethod
1713
+ def _get_sorted_nrow(df, n, sort_col, asc=True):
1714
+ """
1715
+ Internal Utility function that returns a teradataml DataFrame containing n rows
1716
+ of the DataFrame. The Dataframe is sorted on the index column or the first column
1717
+ if there is no index column.
1718
+
1719
+ PARAMETERS:
1720
+ df: teradataml DataFrame
1721
+ n: Specifies the number of rows to select.
1722
+ Type: int
1723
+ sort_col: The column to sort on.
1724
+ Type: str
1725
+ asc: (optional) - Specifies sort order.
1726
+ If True, sort in ascending order.
1727
+ If False, sort in descending order.
1728
+ The default value is True.
1729
+ Type: boolean
1730
+
1731
+ RETURNS:
1732
+ teradataml DataFrame
1733
+
1734
+ EXAMPLES:
1735
+ DataFrameUtils._get_sorted_nrow(df, 10)
1736
+ DataFrameUtils._get_sorted_nrow(df, 20, asc=True)
1737
+ DataFrameUtils._get_sorted_nrow(df, 30, asc=False)
1738
+
1739
+ """
1740
+ #TODO: implement and use this in teradatasqlalchemy
1741
+ tdp = preparer(td_dialect)
1742
+ aed_utils = AedUtils()
1743
+
1744
+ sort_order = "asc"
1745
+ if not asc:
1746
+ sort_order = "desc"
1747
+
1748
+ quoted_cols = [tdp.quote(c) for c in df.columns]
1749
+ sel_cols_str = ",".join(quoted_cols)
1750
+ sel_row_num = "row_number() over (order by \"{0}\" {1}) - 1 as tdml_row_num, {2}".format(sort_col, sort_order, sel_cols_str)
1751
+ filter_str = "tdml_row_num < {0}".format(n)
1752
+ sel_nodeid = aed_utils._aed_select(df._nodeid, sel_row_num)
1753
+ fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
1754
+ sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
1755
+ col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
1756
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types))
1757
+ # Call the function from_node from appropriate class either DataFrame or GeoDataFrame
1758
+ new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
1759
+ new_df._orderby = df._orderby
1760
+ new_df._metaexpr._n_rows = n
1761
+ return new_df
1762
+
1763
+ @staticmethod
1764
+ def _get_database_names(connection, schema_name):
1765
+ """
1766
+ Function to return a list valid of database names for a given sqlalchemy connection.
1767
+ This function is used to determine whether the database used is valid in user APIs such as copy_to_sql.
1768
+
1769
+ PARAMETERS:
1770
+ connection: Required Argument.
1771
+ A SQLAlchemy connection object.
1772
+
1773
+ schema_name: Required Argument
1774
+ String specifying the requested schema name.
1775
+
1776
+ RAISES:
1777
+ TeradataMlException (TDMLDF_INFO_ERROR)
1778
+
1779
+ EXAMPLES:
1780
+ DataFrameUtils._get_database_names(get_connection(), schema_name)
1781
+ """
1782
+ #TODO: implement and use this in teradatasqlalchemy
1783
+ table_obj = table('databasesV', column('databasename'), schema='dbc')
1784
+ stmt = select(text(str(func.lower(table_obj.c.databasename)) + ' as databasename')).where(
1785
+ text('databasename (NOT CASESPECIFIC) = {} (NOT CASESPECIFIC)'.format(':schema_name')))
1786
+ stmt = text(str(stmt))
1787
+ stmt = stmt.bindparams(schema_name=schema_name)
1788
+ res = connection.execute(stmt).fetchall()
1789
+ return [name.databasename for name in res]
1790
+
1791
+ @staticmethod
1792
+ def _get_common_parent_df_from_dataframes(dfs):
1793
+ """
1794
+ Internal function to return common parent dataframe from given list of dataframes.
1795
+ """
1796
+ from teradataml import DataFrame, in_schema
1797
+ aed_utils = AedUtils()
1798
+ if len(dfs) == 1:
1799
+ operation = aed_utils._aed_get_node_query_type(dfs[0]._nodeid)
1800
+ if operation == "table":
1801
+ # Return the same dataframe if it is DataFrame object from table.
1802
+ return dfs[0]
1803
+
1804
+ # If select node or any other node, then get the parent node and execute it.
1805
+ pids = aed_utils._aed_get_parent_nodeids(dfs[0]._nodeid)
1806
+ if not aed_utils._aed_is_node_executed(pids[0]):
1807
+ _ = DataFrameUtils._execute_node_return_db_object_name(pids[0])
1808
+
1809
+ tab_name_first = aed_utils._aed_get_source_tablename(pids[0])
1810
+
1811
+ db_schema = UtilFuncs._extract_db_name(tab_name_first)
1812
+ db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1813
+
1814
+ return DataFrame(in_schema(db_schema, db_table_name))
1815
+
1816
+ pids_first = None
1817
+ parent_df = None
1818
+ for i in range(len(dfs)):
1819
+ pids = aed_utils._aed_get_parent_nodeids(dfs[i]._nodeid)
1820
+
1821
+ if parent_df is None:
1822
+ if not aed_utils._aed_is_node_executed(pids[0]):
1823
+ _ = DataFrameUtils._execute_node_return_db_object_name(pids[0])
1824
+
1825
+ tab_name_first = aed_utils._aed_get_source_tablename(pids[0])
1826
+
1827
+ db_schema = UtilFuncs._extract_db_name(tab_name_first)
1828
+ db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1829
+
1830
+ parent_df = DataFrame(in_schema(db_schema, db_table_name))
1831
+ pids_first = pids
1832
+ else:
1833
+ if pids_first != pids:
1834
+ raise TeradataMlException(Messages.get_message(MessageCodes.DFS_NO_COMMON_PARENT),
1835
+ MessageCodes.DFS_NO_COMMON_PARENT)
1836
+
1837
+ return parent_df