teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,2114 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+
4
+ Unpublished work.
5
+ Copyright (c) 2018 by Teradata Corporation. All rights reserved.
6
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
7
+
8
+ Primary Owner: mark.sandan@teradata.com
9
+ Secondary Owner:
10
+
11
+ This file implements util functions of data frame.
12
+ """
13
+
14
+ import numbers
15
+ import re
16
+ import pandas as pd
17
+ import json
18
+ from collections import OrderedDict
19
+
20
+ from teradataml.common.utils import UtilFuncs
21
+ from teradataml.common.aed_utils import AedUtils
22
+ from teradataml.common.constants import AEDConstants, PTITableConstants, \
23
+ SQLPattern, PythonTypes, TeradataConstants, SQLConstants
24
+ from teradataml.common.sqlbundle import SQLBundle
25
+ from teradataml.common.exceptions import TeradataMlException
26
+ from teradataml.common.messages import Messages
27
+ from teradataml.common.messagecodes import MessageCodes
28
+
29
+ from teradataml.context.context import get_context, get_connection
30
+ from teradataml.context.context import _get_current_databasename
31
+ from teradataml.dbutils.dbutils import _execute_query_and_generate_pandas_df
32
+
33
+ from teradataml.options.display import display
34
+ from teradataml.options.configure import configure
35
+ from teradataml.utils.dtypes import _Dtypes, _DtypesMappers
36
+ from teradataml.utils.utils import execute_sql
37
+
38
+ from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
39
+ from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
40
+ import teradatasqlalchemy.types as tdsqlalchemy
41
+ import teradataml.dataframe as tdmldf
42
+ from teradataml.dataframe.sql_interfaces import ColumnExpression
43
+
44
+ from sqlalchemy.sql import select
45
+ from sqlalchemy.sql.expression import text
46
+ from sqlalchemy import table, column, func
47
+ from datetime import datetime, date, time
48
+ from decimal import Decimal
49
+
50
+ # TODO - Need to write unit testcases for these functions
51
+ class DataFrameUtils():
52
+
53
+ @staticmethod
54
+ def _execute_node_return_db_object_name(nodeid, metaexpression = None):
55
+ """
56
+ Fetches queries and view names from AED node and creates views from queries
57
+ Additionally inspects the metaexpression for consistency
58
+
59
+ PARAMETERS:
60
+ nodeid: nodeid to execute
61
+ metaexpression: (optional) updated _metaexpr to validate
62
+
63
+ EXAMPLES:
64
+ _execute_node_return_db_object_name(nodeid)
65
+ _execute_node_return_db_object_name(nodeid, metaexpr)
66
+
67
+ RETURNS:
68
+ Top level view name.
69
+
70
+ """
71
+ aed_obj = AedUtils()
72
+ if not aed_obj._aed_is_node_executed(nodeid):
73
+
74
+ view_query_node_type_list = aed_obj._aed_get_exec_query(nodeid)
75
+ view_names, queries, node_query_types, node_ids = view_query_node_type_list
76
+
77
+ # Executing Nodes / Creating Views
78
+ for index in range(len(queries) - 1, -1, -1):
79
+ is_persist = False
80
+ if metaexpression and metaexpression._is_persist:
81
+ is_persist = True
82
+
83
+ try:
84
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
85
+ UtilFuncs._create_table(view_names[index], queries[index], volatile=True)
86
+
87
+ elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_ML_QUERY_MULTI_OUTPUT.value or\
88
+ ("OUT TABLE " in queries[index] and SQLPattern.SQLMR.value.match(queries[index])) or \
89
+ is_persist:
90
+ # TODO:: OR condition in above needs to be removed once AED support is added.
91
+ UtilFuncs._create_table(view_names[index], queries[index])
92
+
93
+ elif node_query_types in ['groupby', 'groupbytime']:
94
+ # If query_type is either groupby or groupbytime get it's parent
95
+ # nodeid and execute queries for the same
96
+ parent_nodeid = aed_obj._aed_get_parent_nodeids(nodeid)[0]
97
+ DataFrameUtils._execute_node_return_db_object_name(parent_nodeid)
98
+
99
+ elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_REFERENCE.value:
100
+ # Reference nodes - To be ignored.
101
+ pass
102
+
103
+ else:
104
+ UtilFuncs._create_view(view_names[index], queries[index])
105
+
106
+ # Updating Node Status for executed Node
107
+ aed_obj._aed_update_node_state_single(node_ids[index], AEDConstants.AED_NODE_EXECUTED.value)
108
+
109
+ except Exception as emsg:
110
+ # TODO:: Append node execution details to emsg.
111
+ # Node description, such as nodeType or node operation, should be added
112
+ # here in 'emsg' to give away more information, where exactly
113
+ # node execution failed.
114
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
115
+ MessageCodes.TDMLDF_EXEC_SQL_FAILED)
116
+
117
+ # Setting New Table name retrieved to TDML DF
118
+ result_table_view_name = aed_obj._aed_get_tablename(nodeid)
119
+ # validate the metaexpression
120
+ if configure._validate_metaexpression:
121
+ DataFrameUtils._validate_metaexpression(result_table_view_name, metaexpression)
122
+
123
+ return result_table_view_name
124
+
125
+ @staticmethod
126
+ def _validate_metaexpression(result_table_view_name, metaexpression):
127
+ """
128
+ Inspects the metaexpression for consistency with the underlying table/view
129
+
130
+ PARAMETERS:
131
+ result_table_view_name: a string representing the table/view name to check column metadata
132
+ metaexpression: the metaexpr of the DataFrame to compare against the result_table_view_name
133
+
134
+ EXAMPLES:
135
+ _validate_metaexpression('t1', df._metaexpr)
136
+ _execute_node_return_db_object_name(nodeid, metaexpr)
137
+
138
+ RETURNS:
139
+ None
140
+ Outputs RuntimeWarnings if mismatches are found
141
+
142
+ """
143
+ # metaexpression should have already been updated
144
+ if metaexpression is not None:
145
+
146
+ name = lambda x: x[0]
147
+ type_ = lambda x: x[1]
148
+
149
+ # compare sorted by name of column
150
+ df = sorted(UtilFuncs._describe_column(DataFrameUtils._get_metadata_from_table(result_table_view_name)), key = lambda x: x[0])
151
+ meta = sorted(metaexpression.c, key = lambda x: x.name)
152
+
153
+ # check length
154
+ if len(df) == len(meta):
155
+ for i in range(len(df)):
156
+
157
+ # map Teradata type to python type
158
+ meta_type = UtilFuncs._teradata_type_to_python_type(meta[i].type)
159
+
160
+ # compare column names and types
161
+ if meta[i].name != name(df[i]) or meta_type != type_(df[i]):
162
+ err_msg = "[Mismatch when checking %s]\n\t[Table/View] %s %s\n\t[MetaExpression] %s %s (mapped from => %s)\n"
163
+ raise RuntimeError(err_msg % (result_table_view_name,
164
+ name(df[i]), type_(df[i]),
165
+ meta[i].name, meta_type, meta[i].type))
166
+ else:
167
+ err_msg = "[Length mismatch when checking %s]\nSource Table/View has length %s but MetaExpression has length %s"
168
+ raise RuntimeError(err_msg % (result_table_view_name, len(df), len(meta)))
169
+
170
+ @staticmethod
171
+ def _get_dataframe_print_string(table_name, index_label, orderby=None, undropped_index=None):
172
+ """
173
+ Builds string output for teradataml DataFrame
174
+
175
+ PARAMETERS:
176
+ table_name - Name of the database table to read from.
177
+ index_label - String/List specifying column to use as index.
178
+ orderby - order expression to sort returned rows
179
+
180
+ EXAMPLES:
181
+ _get_dataframe_print_string('table_name', None, None)
182
+
183
+ RETURNS:
184
+ String representation of a pandas DataFrame.
185
+
186
+ """
187
+ read_query = SQLBundle._build_top_n_print_query(table_name, display.max_rows, orderby)
188
+
189
+ if index_label is not None:
190
+ pandas_df = _execute_query_and_generate_pandas_df(read_query, index=index_label)
191
+ else:
192
+ pandas_df = _execute_query_and_generate_pandas_df(read_query)
193
+
194
+ return pandas_df.to_string()
195
+
196
+ @staticmethod
197
+ def _get_pprint_dtypes(column_names_and_types, null_count=False):
198
+ """
199
+ returns a string containing the column names and types.
200
+ If null_count is not None, the string will also contain
201
+ the number of non-null values for each column.
202
+
203
+ PARAMETERS:
204
+ column_names_and_types - List of column names and types.
205
+ null_count(optional) - List of the non-null count for each column.
206
+
207
+ EXAMPLES:
208
+ >>>print(_get_pprint_dtypes(column_names_and_types)
209
+ accounts str
210
+ Feb float
211
+ Jan int
212
+ Mar int
213
+ Apr int
214
+ datetime str
215
+
216
+ >>>print(_get_pprint_dtypes(column_names_and_types, null_count)
217
+ accounts 3 non-null str
218
+ Feb 3 non-null float
219
+ Jan 3 non-null int
220
+ Mar 3 non-null int
221
+ Apr 3 non-null int
222
+ datetime 3 non-null str
223
+
224
+ RAISES:
225
+
226
+ """
227
+
228
+ col_names = [i[0] for i in column_names_and_types]
229
+ col_types = [i[1] for i in column_names_and_types]
230
+ max_col_names = len(max(col_names, key=len)) + 4
231
+ max_col_types = len(max(col_types, key=len))
232
+ dtypes_string = ""
233
+ if not null_count:
234
+ for colname, coltype in column_names_and_types:
235
+ dtypes_string += "{0: <{name_width}}{1: >{type_width}}\n".format(colname, coltype,
236
+ name_width=max_col_names,
237
+ type_width=max_col_types)
238
+ else:
239
+ null_count = [i[2] for i in column_names_and_types]
240
+ max_null_count = len(str(max(null_count, key=len)))
241
+ for colname, coltype, num_nulls in column_names_and_types:
242
+ dtypes_string += "{0: <{name_width}}{1: <{count_width}} non-null {2: <{type_width}}\n".format(colname,
243
+ num_nulls,
244
+ coltype,
245
+ name_width=max_col_names,
246
+ count_width=max_null_count,
247
+ type_width=max_col_types)
248
+ # Remove last new line character.
249
+ dtypes_string = dtypes_string[:-1]
250
+ return dtypes_string
251
+
252
+ @staticmethod
253
+ def _get_metadata_from_table(table_name):
254
+ """
255
+ Retrieves column metadata by executing a HELP COLUMN command.
256
+
257
+ PARAMETERS:
258
+ table_name - The table name or view name.
259
+
260
+ RETURNS:
261
+ returns the result set (column information) from HELP COLUMN.
262
+
263
+ RAISES:
264
+ Database error if an error occurred while executing the HELP COLUMN.
265
+
266
+ EXAMPLES:
267
+ df = DataFrame.from_table('mytab')
268
+ metadata = _get_metadata_from_table(df._table_name)
269
+ """
270
+ # Construct HELP COLUMN command.
271
+ help_col_sql = SQLBundle._build_help_column(table_name)
272
+ # Execute HELP COLUMN command.
273
+ return UtilFuncs._execute_query(help_col_sql)
274
+
275
+ @staticmethod
276
+ def _extract_select_string(select_expression):
277
+ """
278
+ Takes in a string/list representing a Pandas selection clause of any of the forms (only):
279
+ a) "col1" or 'col1'
280
+ b) ["col 1"] or ['col 1']
281
+ c) ["col1", "col2", "col3"] or ['col1', 'col2', 'col3']
282
+ d) [['col1', 'col2', 'col3']] or [["col1", "col2", "col3"]]
283
+
284
+ And returns a list with column strings representing the selection of the form:
285
+ a) ['col1']
286
+ b) ['col 1']
287
+ c) ['col1','col2','col3']
288
+ d) ['col1','col2','col3']
289
+
290
+ Column Names ("col1", "col2"..) are Strings representing database table Columns.
291
+ All Standard Teradata Data-Types for columns supported: INTEGER, VARCHAR(5), FLOAT.
292
+
293
+ PARAMETERS:
294
+ selection_expression - Expression representing column selection
295
+ Type - String or List of Strings or List of List (Single level only)
296
+ Required - Yes
297
+
298
+ EXAMPLES:
299
+ UtilFuncs._extract_select_string([['col1', 'col2']])
300
+ UtilFuncs._extract_select_string("col1")
301
+ UtilFuncs._extract_select_string(["col1"])
302
+ UtilFuncs._extract_select_string(["col1","col2","col3"])
303
+
304
+ RETURNS:
305
+ List of Strings representing column names.
306
+
307
+ RAISES:
308
+ TeradataMlException
309
+ """
310
+ tdp = preparer(td_dialect)
311
+ column_list = []
312
+
313
+ # Single String column
314
+ if isinstance(select_expression, str):
315
+ # Error handling - Empty String
316
+ if select_expression == "":
317
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
318
+ MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
319
+ else:
320
+ column_list.append(tdp.quote("{0}".format(select_expression.strip())))
321
+
322
+ # Error Handling - [], [""], [None], ["None"], ['col1', None], ['col1', '']
323
+ elif isinstance(select_expression, list) and (len(select_expression) == 0 or
324
+ any(element in [None, "None", ""] for element in select_expression)):
325
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
326
+ MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
327
+
328
+ # List - ["col1"] or ["col1", "col2", "col3"]
329
+ elif isinstance(select_expression, list) and all(isinstance(element, str) for element in select_expression):
330
+ if len(select_expression) == 1:
331
+ column_list.append(tdp.quote("{0}".format(select_expression[0].strip())))
332
+ else:
333
+ column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression]
334
+
335
+ # List of List (Single level only - Pandas Syntax) - [["col1", "col2", "col3"]]
336
+ elif isinstance(select_expression, list) and isinstance(select_expression[0], list):
337
+ # Error Handling - [[]], [[""]], [[None]], [['col1', None]], [['col1', "None"]], ["col1", ""]
338
+ if len(select_expression[0]) == 0 or any(element in [None, "None", ""] for element in select_expression[0]):
339
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
340
+ MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
341
+
342
+ else:
343
+ column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression[0]]
344
+
345
+ # Any other Format - Raise Format Exception
346
+ else:
347
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_FORMAT),
348
+ MessageCodes.TDMLDF_SELECT_INVALID_FORMAT)
349
+ return column_list
350
+
351
+ @staticmethod
352
+ def _get_primary_index_from_table(table_name):
353
+ """
354
+ Retrieves the primary index by executing a HELP INDEX command.
355
+ PARAMETERS:
356
+ table_name - The table name or volatile table name.
357
+ RETURNS:
358
+ Returns a list containing the primary index columns from HELP INDEX.
359
+ If the there are no primary index (NoPI table), then returns None.
360
+ RAISES:
361
+ Database error if an error occurred while executing the HELP INDEX.
362
+ EXAMPLES:
363
+ df = DataFrame.from_table('mytab')
364
+ index_labels = df._get_metadata_from_table(df._table_name)
365
+ """
366
+ # Construct HELP INDEX command.
367
+ help_index_sql = SQLBundle._build_help_index(table_name)
368
+
369
+ # Execute HELP INDEX command.
370
+ rows = UtilFuncs._execute_query(help_index_sql)
371
+ index_labels = []
372
+ for row in rows:
373
+ # row[1] specifies whether the Index is 'Primary or Secondary?'
374
+ if row[1].rstrip() == 'P':
375
+ # row[2] specifies a string of comma separated column names that form the primary index
376
+ if "," in row[2]:
377
+ index_cols = row[2].split(',')
378
+ else:
379
+ index_cols = [row[2]]
380
+ for index_col in index_cols:
381
+ # Since TD_TIMEBUCKET column in PTI tables is not functionally available, it can be ignored
382
+ # from the index information as well (else a warning is generated by SQLAlchemy).
383
+ # row[12] corresponds to 'Timebucket' column in the results of 'help index' SQL command, which
384
+ # is available only when the version supports PTI tables.
385
+ if index_col == PTITableConstants.TD_TIMEBUCKET.value and len(row) > 11 and row[12] is not None:
386
+ continue
387
+ else:
388
+ index_labels.append(index_col)
389
+
390
+ if len(index_labels) > 0:
391
+ return index_labels
392
+ else:
393
+ return None
394
+
395
+ @staticmethod
396
+ def __validate_sort_type_raise_exception(sort_col_type):
397
+ """
398
+ Function to raise TeradatamlException for errors encountered for invalid/incorrect
399
+ "sort_col_type" in "_validate_sort_type" function.
400
+
401
+ PARAMETERS:
402
+ sort_col_type: The sort column type.
403
+
404
+ RETURNS:
405
+ None
406
+
407
+ RAISES:
408
+ TeradataMlException
409
+
410
+ EXAMPLES:
411
+ df_utils.__validate_sort_type_raise_exception(PythonTypes.PY_STRING_TYPE.value)
412
+ """
413
+ msg = Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE).format(sort_col_type)
414
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE)
415
+
416
+ @staticmethod
417
+ def _validate_sort_col_type(sort_col_type, sort_col_values):
418
+ """
419
+ Validates a list of sort column values with the sort column type.
420
+
421
+ PARAMETERS:
422
+ sort_col_type - The sort column type.
423
+ sort_col_values - A single value or list-like values
424
+
425
+ RETURNS:
426
+ None
427
+
428
+ RAISES:
429
+ TeradataMlException
430
+
431
+ EXAMPLES:
432
+ df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, ["Jan", "Feb"])
433
+ df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, "Jan")
434
+ df_utils._validate_sort_col_type(PythonTypes.PY_INT_TYPE.value, [1, 2])
435
+ """
436
+ if isinstance(sort_col_values, list):
437
+ if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
438
+ if not all(isinstance(i, str) for i in sort_col_values):
439
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
440
+ elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
441
+ if not all(isinstance(i, float) for i in sort_col_values):
442
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
443
+ elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
444
+ if not all(isinstance(i, Decimal) for i in sort_col_values):
445
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
446
+ elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
447
+ if not all(isinstance(i, datetime) for i in sort_col_values):
448
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
449
+ elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
450
+ if not all(isinstance(i, time) for i in sort_col_values):
451
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
452
+ elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
453
+ if not all(isinstance(i, date) for i in sort_col_values):
454
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
455
+ elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
456
+ if not all(isinstance(i, bytes) for i in sort_col_values):
457
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
458
+ else: # numeric type
459
+ if not all(isinstance(i, numbers.Integral) for i in sort_col_values):
460
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
461
+ elif isinstance(sort_col_values, (tuple, dict)):
462
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
463
+ MessageCodes.TDMLDF_DROP_ARGS)
464
+ else:
465
+ if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
466
+ if not isinstance(sort_col_values, str):
467
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
468
+ elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
469
+ if not isinstance(sort_col_values, float):
470
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
471
+ elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
472
+ if not isinstance(sort_col_values, Decimal):
473
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
474
+ elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
475
+ if not isinstance(sort_col_values, datetime):
476
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
477
+ elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
478
+ if not isinstance(sort_col_values, time):
479
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
480
+ elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
481
+ if not isinstance(sort_col_values, date):
482
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
483
+ elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
484
+ if not isinstance(sort_col_values, bytes):
485
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
486
+ else: # numeric type
487
+ if not isinstance(sort_col_values, numbers.Integral):
488
+ DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
489
+
490
+ def _get_required_columns_types_from_metaexpr(metaexpr, col_list = None):
491
+ """
492
+ Retrieves column names and types from meta expression. If you want to get types for only some columns,
493
+ pass those columns to 'col_list' argument.
494
+
495
+ PARAMETERS:
496
+ metaexpr - Meta expression from which columns and types to be retrieved.
497
+ col_list - Column list for which you want to get types
498
+
499
+ RETURNS:
500
+ Dictionary: key as column name and datatype as value.
501
+
502
+ EXAMPLES:
503
+ df = DataFrame.from_table('mytab')
504
+ metadata = _get_required_columns_types_from_metaexpr()
505
+ """
506
+
507
+ if isinstance(col_list, str):
508
+ col_list = [col_list]
509
+
510
+ if col_list is not None and not isinstance(col_list, list):
511
+ return None
512
+
513
+ meta_cols = metaexpr.t.c
514
+ meta_columns = [c.name for c in meta_cols]
515
+ col_names = []
516
+ col_types = []
517
+
518
+ # When column list to retrieve is not provided, return meta-data for all columns.
519
+ if col_list is None:
520
+ for col_name in meta_columns:
521
+ col_names.append(meta_cols[col_name].name)
522
+ col_types.append(meta_cols[col_name].type)
523
+
524
+ # Return meta-data for only requested columns otherwise.
525
+ else:
526
+ for col_name in col_list:
527
+ if DataFrameUtils._check_column_exists(col_name, meta_columns):
528
+ # _metaexpr saves columns without quotes, so unquoting.
529
+ unquoted_col_name = col_name.replace('"', "")
530
+ col_names.append(meta_cols[unquoted_col_name].name)
531
+ col_types.append(meta_cols[unquoted_col_name].type)
532
+
533
+ return OrderedDict(zip(col_names, col_types))
534
+
535
+ @staticmethod
536
+ def _check_column_exists(column_name, df_columns):
537
+ """
538
+ Checks provide column present in list of columns or not.
539
+ Note:
540
+ It is calling functions responsibility to send the column and columns list in proper case.
541
+ By default the look up is case-sensitive. If they would like to have it case insensitive, then
542
+ one should send the the column_name and df_columns list in lower case.
543
+
544
+ PARAMETERS:
545
+ column_name - Column name which need to be check.
546
+ df_columns - List columns in which column to be check.
547
+
548
+ RETURNS:
549
+ True if column exists otherwase False.
550
+
551
+ EXAMPLES:
552
+ df = DataFrame.from_table('mytab')
553
+ metadata = _check_column_exists("col1", df.columns)
554
+ """
555
+ unquoted_df_columns = [column.replace('"', "") for column in df_columns]
556
+ if column_name.replace('"', "") in unquoted_df_columns:
557
+ return True
558
+ else:
559
+ return False
560
+
561
+ @staticmethod
562
+ def _validate_agg_function(func, col_names):
563
+ """
564
+ Internal function to validate column names against actual
565
+ column names passed as parameter and aggregate operations
566
+ against valid aggregate operations.
567
+
568
+ PARAMETERS:
569
+ func - (Required) Specifies the function(s) to be
570
+ applied on teradataml DataFrame columns.
571
+ Acceptable formats for function(s) are string,
572
+ dictionary or list of strings/functions.
573
+ Accepted combinations are:
574
+ 1. String function name
575
+ 2. List of string functions
576
+ 3. Dictionary of column names -> string function
577
+ (or list of string functions)
578
+ col_names - List. Names of the columns in Dataframe.
579
+
580
+ RETURNS:
581
+ operations - dict of columns -> aggregate operations
582
+ Unified dictionary, similar to func, even for string and
583
+ list of strings or functions.
584
+
585
+ RAISES:
586
+ 1. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
587
+ operation(s) received in parameter 'func' is/are
588
+ invalid.
589
+
590
+ Possible Value :
591
+ Invalid aggregate operation(s): minimum, counter.
592
+ Valid aggregate operation(s): count, max, mean, min,
593
+ std, sum.
594
+
595
+ 2. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
596
+ specified in 'func' is not present in the dataframe.
597
+
598
+ Possible Value :
599
+ Invalid column(s) given in parameter func: col1.
600
+ Valid column(s) : A, B, C, D.
601
+
602
+ EXAMPLES:
603
+ Let the dataframe contain 2 columns, col1 and col2.
604
+
605
+ VALID EXAMPLES:
606
+ 1. operations = DataFrameUtils._validate_agg_function(
607
+ operation = 'mean', ['col1', 'col2'])
608
+
609
+ 2. operations = DataFrameUtils._validate_agg_function(
610
+ operation = ['mean', 'min'], ['col1', 'col2'])
611
+
612
+ 3. operations = DataFrameUtils._validate_agg_function(
613
+ {'col1' : ['mean', 'min'], 'col2' : 'count'},
614
+ ['col1', 'col2'])
615
+
616
+ INVALID EXAMPLES:
617
+ 1. operations = DataFrameUtils._validate_agg_function(
618
+ operation = 'counter', ['col1', 'col2'])
619
+
620
+ 2. operations = DataFrameUtils._validate_agg_function(
621
+ {'col1' : ['mean', 'min'], 'col55' : 'count'},
622
+ ['col1', 'col2'])
623
+ """
624
+ operations = OrderedDict()
625
+
626
+ valid_aggregate_operations = UtilFuncs._get_valid_aggregate_operations()
627
+
628
+ if isinstance(func, str):
629
+ for column in col_names:
630
+ operations[column] = [func]
631
+ elif isinstance(func, list):
632
+ for column in col_names:
633
+ operations[column] = func
634
+ else:
635
+ for column in func:
636
+ if isinstance(func[column], str):
637
+ func[column] = [func[column]] # Converts string inside dict to list
638
+ operations = func
639
+
640
+ given_columns = operations.keys()
641
+ invalid_columns = []
642
+ all_operations = []
643
+ for col in given_columns:
644
+ all_operations.extend(operations[col])
645
+ if col not in col_names:
646
+ invalid_columns.append(col)
647
+ if len(invalid_columns) > 0: # If any of the columns specified is not present in dataframe
648
+ col_names.sort()
649
+ invalid_columns.sort()
650
+ msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
651
+ format(", ".join(invalid_columns), 'func', ", ".join(col_names))
652
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
653
+
654
+ all_operations = list(set(all_operations))
655
+ invalid_aggregates = []
656
+ for operation in all_operations:
657
+ if operation not in valid_aggregate_operations and not operation.startswith('percentile_') \
658
+ and operation not in UtilFuncs._get_valid_time_series_aggregate_operations():
659
+ invalid_aggregates.append(operation)
660
+ if len(invalid_aggregates) > 0: # If any of the aggregate operations specified is not valid
661
+ # To raise error message, let's add other time series aggregate operations those can be
662
+ # used with DataFrame.agg() method.
663
+ valid_aggregate_operations = valid_aggregate_operations + ['first', 'last', 'mode']
664
+ valid_aggregate_operations.sort()
665
+ invalid_aggregates.sort()
666
+ msg = Messages.get_message(MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION). \
667
+ format(", ".join(invalid_aggregates), ", ".join(valid_aggregate_operations))
668
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION)
669
+
670
+ return operations
671
+
672
+ @staticmethod
673
+ def _generate_aggregate_column_expression(df, column, operation, describe_op, tdp, **kwargs):
674
+ """
675
+ Function generate the aggregate column expression for the provided column
676
+ and aggregate function.
677
+
678
+ PARAMETERS:
679
+ df:
680
+ Required Argument.
681
+ Specifies teradataml DataFrame which is to be used to get the
682
+ desired aggregate column expression.
683
+ Types: teradataml DataFrame
684
+
685
+ column:
686
+ Required Argument.
687
+ Specifies the column name for which desired aggregate operation is
688
+ to be used.
689
+ Types: str
690
+
691
+ operation:
692
+ Required Argument.
693
+ Specifies the aggregate operation.
694
+ Types: str
695
+
696
+ describe_op:
697
+ Required Argument.
698
+ Specifies a boolean flag, that will decide whether the aggregate
699
+ operation is being performed for DataFrame.describe() or not.
700
+ Types: bool
701
+
702
+ tdp:
703
+ Required Argument.
704
+ Specifies a TeradataIdentifierPreparer object. It is required for
705
+ quoting.
706
+ Types: TeradataIdentifierPreparer
707
+
708
+ kwargs:
709
+ Specifies miscellaneous keyword arguments that can be passed to
710
+ aggregate functions.
711
+
712
+ RAISES:
713
+ AttributeError - In case ColumnExpression does not have desired aggregate
714
+ function implemnted.
715
+
716
+ RETURNS:
717
+ A boolean stating whether column is supported or not, New column name,
718
+ New column type, A string representing column aggregate expression,
719
+ invalid column information in case column has unsupported type for an
720
+ aggregate operation.
721
+
722
+ EXAMPLES:
723
+ column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
724
+ DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
725
+ describe_op=describe_op, percentile=percentile,
726
+ tdp=tdp, **kwargs)
727
+ """
728
+ try:
729
+ key_to_process = ""
730
+ # quote column names same as that of the Teradata reserved keywords.
731
+ if "sort_columns" in kwargs:
732
+ key_to_process = "sort_columns"
733
+ elif "sort_column" in kwargs:
734
+ key_to_process = "sort_column"
735
+
736
+ if key_to_process:
737
+ quoted_columns = UtilFuncs._process_for_teradata_keyword(kwargs[key_to_process])
738
+ kwargs[key_to_process] = quoted_columns
739
+
740
+ if operation.startswith('percentile_'):
741
+ try:
742
+ _operation_value = operation.split('_')
743
+ _floatvalue = float(_operation_value[1])
744
+ if _floatvalue < 0.0 or _floatvalue > 1.0 or len(_operation_value)>2:
745
+ raise ValueError
746
+ except ValueError:
747
+ mssg = "Invalid aggregate operation '{}' requested on TeradataML DataFrame." \
748
+ " Valid operation should be in format 'percentile_<floatvalue>' and <floatvalue> " \
749
+ "should be in range [0.0, 1.0].".format(operation)
750
+ raise ValueError(mssg) from None
751
+ func_expression = getattr(df[column], 'percentile')(percentile=_floatvalue)
752
+ else:
753
+ func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
754
+ new_column_name = column if describe_op else "{1}_{0}".format(column, operation)
755
+ # column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str
756
+ return True, new_column_name, NUMBER() if describe_op else func_expression.type, \
757
+ func_expression.compile_label(new_column_name), None
758
+ except AttributeError:
759
+ # We are here means, provided operation is invalid and is not supported.
760
+ # This if for internal purpose only.
761
+ # Validation of operations for "agg" should be done in "agg" only.
762
+ raise RuntimeError("Invalid aggregate function: {}".format(operation))
763
+ except RuntimeError:
764
+ # We are here means, column does not support the provided operation.
765
+ # We will ignore this and add the column to invalid column list.
766
+ # invalid_columns[operation].append("({0} - {1})".format(column, column_type)) OR
767
+ # We will raise Generic message, mentioning DF does not have any column with type
768
+ # supported to perform an operation.
769
+ if describe_op:
770
+ return True, tdp.quote(column), NUMBER(), 'null as {}'.format(tdp.quote(column)), None
771
+ else:
772
+ return False, None, None, None, "({0} - {1})".format(column, df[column].type)
773
+ except Exception:
774
+ raise
775
+
776
+ @staticmethod
777
+ def _construct_sql_expression_for_aggregations(df, column_names, column_types, func, percentile=.5,
778
+ describe_op=False, **kwargs):
779
+ """
780
+ Internal function to create and return the sql expression
781
+ corresponding to given operation, given column_names and
782
+ column_types.
783
+
784
+ Column_types are used to check whether all the datatypes are
785
+ valid types for given operation and throw exception if they
786
+ are not.
787
+
788
+ PARAMETERS :
789
+ df:
790
+ Required Argument.
791
+ Specifies teradataml DataFrame which is to be used to get the desired
792
+ aggregate column expression.
793
+ Types: teradataml DataFrame
794
+
795
+ column_names:
796
+ Required Argument.
797
+ Specifies the column names for which desired aggregate operation is
798
+ to be executed.
799
+ Types: List of strings
800
+
801
+ column_types:
802
+ Required Argument.
803
+ Specifies the respective column types for column names.
804
+ Types: List of teradatasqlalchemy types
805
+
806
+ func:
807
+ Required Argument.
808
+ Specifies the aggregate function(s) to be applied on teradataml
809
+ DataFrame columns.
810
+ Types: string, dictionary or list of strings/functions.
811
+ Accepted combinations are:
812
+ 1. String function name
813
+ 2. List of functions
814
+ 3. Dictionary containing column name as key and aggregate
815
+ function name (string or list of strings) as value
816
+ 4. ColumnExpression built using the aggregate functions.
817
+ 5. List of ColumnExpression built using the aggregate functions.
818
+
819
+ percentile:
820
+ Optional Argument.
821
+ Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
822
+ The default is .5, which returns the 50th percentiles.
823
+ Types: float
824
+
825
+ describe_op:
826
+ Optional Argument.
827
+ Specifies a boolean flag, that will decide whether the aggregate operation being
828
+ performed is for DataFrame.describe() or not.
829
+ Types: bool
830
+
831
+ kwargs:
832
+ Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
833
+
834
+ RETURNS :
835
+ a)sql expression as
836
+ 1. 'min(col1) as min_col1, min(col2) as min_col2' if
837
+ col1 and col2 are the columns in Dataframe and
838
+ operation is 'min'
839
+ 2. 'max(col1) as max_col1, max(col2) as max_col2' if
840
+ col1 and col2 are the columns in Dataframe and
841
+ operation is 'max'
842
+ 3. 'min(col1) as min_col1, stddev_samp(col2) as
843
+ std_col2' if col1, col2 are the columns in
844
+ Dataframe and operations are min, std.
845
+ etc...
846
+ b) new columns' names (eg min_col1, min_col2 ...)
847
+ c) new columns' types
848
+ RAISES:
849
+ TeradataMLException
850
+ 1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
851
+ aggregate operations do not support specified columns.
852
+
853
+ Possible Value :
854
+ No results. Below is/are the error message(s):
855
+ All selected columns [(col1 - VARCHAR)] is/are
856
+ unsupported for 'sum' operation.
857
+
858
+ 2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
859
+ operation(s) received in parameter 'func' is/are
860
+ invalid.
861
+
862
+ Possible Value :
863
+ Invalid aggregate operation(s): minimum, counter.
864
+ Valid aggregate operation(s): count, max, mean, min,
865
+ std, sum.
866
+
867
+ 3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
868
+ specified in func is not present in the dataframe.
869
+
870
+ Possible Value :
871
+ Invalid column(s) given in parameter func: col1.
872
+ Valid column(s) : A, B, C, D.
873
+
874
+ EXAMPLES:
875
+ col_names, col_types = \
876
+ df_utils._get_column_names_and_types_from_metaexpr(
877
+ self._metaexpr)
878
+ expr, new_col_names, new_col_types = \
879
+ df_utils._construct_sql_expression_for_aggregations(
880
+ col_names, col_types, 'min')
881
+
882
+ expr1, new_col_names1, new_col_types1 = \
883
+ df_utils._construct_sql_expression_for_aggregations(
884
+ col_names, col_types, ['min', 'sum'])
885
+
886
+ expr2, new_col_names2, new_col_types2 = \
887
+ df_utils._construct_sql_expression_for_aggregations(
888
+ col_names, col_types, {'col1 : ['min', 'sum'],
889
+ 'col2' : 'mean'})
890
+
891
+ """
892
+
893
+ # eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
894
+ # FLOAT(precision=0)]
895
+
896
+ # eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
897
+ # <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
898
+ # <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
899
+
900
+ # If function is of type time series aggregates, we process aggregation differently.
901
+ if not isinstance(func, str):
902
+ # If func is not instance of string, that means function call is
903
+ # from DataFrame.agg(). And is made to process multiple functions.
904
+ # We will process the same differently, as we need to map and serialize the
905
+ # column names and aggregate function operate on.
906
+ # If we have just function to be executed on complete DataFrame, then we don't need
907
+ # this extra processing. Also, if call is from DataFrame.agg(), time series aggregate check
908
+ # is not required. As special Time Series aggregate functions cannot be used in
909
+ # DataFrame.agg().
910
+ return DataFrameUtils._construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types,
911
+ func, percentile, describe_op,
912
+ **kwargs)
913
+
914
+ as_time_series_aggregate = False
915
+ if "as_time_series_aggregate" in kwargs.keys():
916
+ as_time_series_aggregate = kwargs["as_time_series_aggregate"]
917
+
918
+ if as_time_series_aggregate and func in ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top',
919
+ 'top with ties']:
920
+ return DataFrameUtils._construct_sql_expression_for_time_series_aggregations(df, column_names, column_types,
921
+ func, **kwargs)
922
+
923
+ tdp = preparer(td_dialect)
924
+
925
+ # This variable is used to decide whether DataFrame has all columns unsupported
926
+ # for the provided operations.
927
+ all_unsupported_columns = True
928
+ valid_columns = []
929
+ invalid_columns = []
930
+ new_column_names = []
931
+ new_column_types = []
932
+ for column in column_names:
933
+ column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
934
+ DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
935
+ describe_op=describe_op, percentile=percentile,
936
+ tdp=tdp, **kwargs)
937
+ if column_supported:
938
+ all_unsupported_columns = False
939
+ new_column_names.append(new_column_name)
940
+ new_column_types.append(new_column_type)
941
+ valid_columns.append(column_aggr_expr)
942
+ else:
943
+ invalid_columns.append("({0} - {1})".format(column, df[column].type))
944
+
945
+ if all_unsupported_columns:
946
+
947
+ error_msgs = []
948
+ invalid_columns.sort() # Helps in catching the columns in lexicographic order
949
+ error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(", ".join(invalid_columns),
950
+ func)
951
+ error_msgs.append(error)
952
+
953
+ if len(valid_columns) == 0: # No supported columns in the given list of columns
954
+ raise TeradataMlException(Messages.get_message(
955
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
956
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
957
+
958
+ # quote column names same as that of the Teradata reserved keywords.
959
+ quote_column_name = [UtilFuncs._process_for_teradata_keyword(col) for col in column_names]
960
+
961
+ # Actual columns should be retained if "drop_columns" is set to False.
962
+ if kwargs.get("drop_columns") is False:
963
+ valid_columns = quote_column_name + valid_columns
964
+ new_column_names = column_names + new_column_names
965
+ new_column_types = column_types + new_column_types
966
+
967
+ aggregate_expr = ", ".join(valid_columns)
968
+ return aggregate_expr, new_column_names, new_column_types
969
+
970
+ @staticmethod
971
+ def _construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types, func, percentile=.5,
972
+ describe_op=False, **kwargs):
973
+ """
974
+ Internal function to create and return the sql expression
975
+ corresponding to given operation, given column_names and
976
+ column_types.
977
+
978
+ Column_types are used to check whether all the datatypes are
979
+ valid types for given operation and throw exception if they
980
+ are not.
981
+
982
+ PARAMETERS :
983
+ df:
984
+ Required Argument.
985
+ Specifies teradataml DataFrame which is to be used to get the desired
986
+ aggregate column expression.
987
+ Types: teradataml DataFrame
988
+
989
+ column_names:
990
+ Required Argument.
991
+ Specifies the column names for which desired aggregate operation is
992
+ to be executed.
993
+ Types: List of strings
994
+
995
+ column_types:
996
+ Required Argument.
997
+ Specifies the respective column types for column names.
998
+ Types: List of teradatasqlalchemy types
999
+
1000
+ func:
1001
+ Required Argument.
1002
+ Specifies the aggregate function(s) to be applied on teradataml
1003
+ DataFrame columns.
1004
+ Types: string, dictionary or list of strings/functions.
1005
+ Accepted combinations are:
1006
+ 1. String function name
1007
+ 2. List of functions
1008
+ 3. Dictionary containing column name as key and aggregate
1009
+ function name (string or list of strings) as value
1010
+ 4. ColumnExpression built using the aggregate functions.
1011
+ 5. List of ColumnExpression built using the aggregate functions.
1012
+
1013
+ percentile:
1014
+ Optional Argument.
1015
+ Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
1016
+ The default is .5, which returns the 50th percentiles.
1017
+ Types: float
1018
+
1019
+ describe_op:
1020
+ Optional Argument.
1021
+ Specifies a boolean flag, that will decide whether the aggregate operation being
1022
+ performed is for DataFrame.describe() or not.
1023
+ Types: bool
1024
+
1025
+ kwargs:
1026
+ Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
1027
+
1028
+ RETURNS :
1029
+ a)sql expression as
1030
+ 1. 'min(col1) as min_col1, min(col2) as min_col2' if
1031
+ col1 and col2 are the columns in Dataframe and
1032
+ operation is 'min'
1033
+ 2. 'max(col1) as max_col1, max(col2) as max_col2' if
1034
+ col1 and col2 are the columns in Dataframe and
1035
+ operation is 'max'
1036
+ 3. 'min(col1) as min_col1, stddev_samp(col2) as
1037
+ std_col2' if col1, col2 are the columns in
1038
+ Dataframe and operations are min, std.
1039
+ etc...
1040
+ b) new columns' names (eg min_col1, min_col2 ...)
1041
+ c) new columns' types
1042
+ RAISES:
1043
+ TeradataMLException
1044
+ 1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
1045
+ aggregate operations do not support specified columns.
1046
+
1047
+ Possible Value :
1048
+ No results. Below is/are the error message(s):
1049
+ All selected columns [(col1 - VARCHAR)] is/are
1050
+ unsupported for 'sum' operation.
1051
+
1052
+ 2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
1053
+ operation(s) received in parameter 'func' is/are
1054
+ invalid.
1055
+
1056
+ Possible Value :
1057
+ Invalid aggregate operation(s): minimum, counter.
1058
+ Valid aggregate operation(s): count, max, mean, min,
1059
+ std, sum.
1060
+
1061
+ 3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
1062
+ specified in func is not present in the dataframe.
1063
+
1064
+ Possible Value :
1065
+ Invalid column(s) given in parameter func: col1.
1066
+ Valid column(s) : A, B, C, D.
1067
+
1068
+ EXAMPLES:
1069
+ col_names, col_types = \
1070
+ df_utils._get_column_names_and_types_from_metaexpr(
1071
+ self._metaexpr)
1072
+ expr, new_col_names, new_col_types = \
1073
+ df_utils._construct_sql_expression_for_aggregations_for_agg(
1074
+ col_names, col_types, 'min')
1075
+
1076
+ expr1, new_col_names1, new_col_types1 = \
1077
+ df_utils._construct_sql_expression_for_aggregations_for_agg(
1078
+ col_names, col_types, ['min', 'sum'])
1079
+
1080
+ expr2, new_col_names2, new_col_types2 = \
1081
+ df_utils._construct_sql_expression_for_aggregations_for_agg(
1082
+ col_names, col_types, {'col1 : ['min', 'sum'],
1083
+ 'col2' : 'mean'})
1084
+
1085
+ """
1086
+ # If function is of type time series aggregates, we process aggregation differently.
1087
+ # Also, one is not supposed to pass below time series aggreagtes to DataFrame.agg():
1088
+ # ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top', 'top with ties']
1089
+ # Thus, no extra processing is required for time series aggregates over here.
1090
+
1091
+ if isinstance(func, ColumnExpression) or (isinstance(func, list) and isinstance(func[0], ColumnExpression)):
1092
+ column_agg_expr = []
1093
+ new_column_names = []
1094
+ new_column_types = []
1095
+ if isinstance(func, ColumnExpression):
1096
+ func= UtilFuncs._as_list(func)
1097
+
1098
+ # validate that func is a list of ColumnExpression
1099
+ for expr in func:
1100
+ if not isinstance(expr, ColumnExpression):
1101
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
1102
+ 'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
1103
+ MessageCodes.UNSUPPORTED_DATATYPE)
1104
+
1105
+ for operations in func:
1106
+ alias = operations.alias_name
1107
+ column_agg_expr.append(operations.compile_label(alias))
1108
+ new_column_names.append(alias)
1109
+ new_column_types.append(operations.type)
1110
+ aggregate_expr = ", ".join(column_agg_expr)
1111
+ return aggregate_expr, new_column_names, new_column_types
1112
+
1113
+ # 'operations' contains dict of columns -> list of aggregate operations
1114
+ operations = DataFrameUtils._validate_agg_function(func, column_names)
1115
+
1116
+ all_valid_columns = []
1117
+ all_invalid_columns = {}
1118
+ all_new_column_names = []
1119
+ all_new_column_types = []
1120
+
1121
+ # For each column, the value is True if there is at least one valid operation (operation on valid datatype)
1122
+ column_supported = {}
1123
+ tdp = preparer(td_dialect)
1124
+ for column in operations:
1125
+ column_supported[column] = False
1126
+ valid_columns = []
1127
+ invalid_columns = {}
1128
+ new_column_names = []
1129
+ new_column_types = []
1130
+ for operation in operations[column]:
1131
+ is_colop_supported, new_col, new_coltype, column_aggr_expr, invalid_column_info = \
1132
+ DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=operation,
1133
+ describe_op=describe_op, percentile=percentile,
1134
+ tdp=tdp, **kwargs)
1135
+ if is_colop_supported:
1136
+ column_supported[column] = is_colop_supported
1137
+ new_column_names.append(new_col)
1138
+ new_column_types.append(new_coltype)
1139
+ valid_columns.append(column_aggr_expr)
1140
+ else:
1141
+ if operation in invalid_columns:
1142
+ invalid_columns[operation].append(invalid_column_info)
1143
+ else:
1144
+ invalid_columns[operation] = [invalid_column_info]
1145
+
1146
+ all_valid_columns.extend(valid_columns)
1147
+ all_new_column_names.extend(new_column_names)
1148
+ all_new_column_types.extend(new_column_types)
1149
+
1150
+ for operation in invalid_columns:
1151
+ if operation in all_invalid_columns:
1152
+ all_invalid_columns[operation].extend(invalid_columns[operation])
1153
+ else:
1154
+ all_invalid_columns[operation] = invalid_columns[operation]
1155
+
1156
+ unsupported_columns = [col for col in column_supported if not column_supported[col]]
1157
+ unsupported_columns.sort() # helps in catching the columns in lexicographic order
1158
+
1159
+ error_msgs = []
1160
+ for operation in sorted(all_invalid_columns):
1161
+ all_invalid_columns[operation].sort() # helps in catching the columns in
1162
+ # lexicographic order
1163
+ error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(
1164
+ ", ".join(all_invalid_columns[operation]), operation)
1165
+ error_msgs.append(error)
1166
+
1167
+ if not all(column_supported[oper] for oper in column_supported):
1168
+ new_msg = MessageCodes.TDMLDF_AGGREGATE_AGG_DICT_ERR.value.format(", ".join(unsupported_columns))
1169
+ error_msgs.append(new_msg)
1170
+ msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs))
1171
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1172
+
1173
+ elif len(all_valid_columns) == 0: # No supported columns in the given list of columns
1174
+ raise TeradataMlException(Messages.get_message(
1175
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
1176
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1177
+
1178
+ aggregate_expr = ", ".join(all_valid_columns)
1179
+ return aggregate_expr, all_new_column_names, all_new_column_types
1180
+
1181
+ @staticmethod
1182
+ def _construct_sql_expression_for_time_series_aggregations(df, column_names, column_types, func, **kwargs):
1183
+ """
1184
+ Internal function to create and return the sql expression
1185
+ corresponding to given time series function, given column_names and
1186
+ column_types.
1187
+
1188
+ Column_types are used to check whether all the datatypes are
1189
+ valid types for given operation and throw exception if they
1190
+ are not.
1191
+
1192
+ NOTE:
1193
+ This function should be used only for time series aggregates.
1194
+
1195
+ PARAMETERS :
1196
+ df:
1197
+ Required Argument.
1198
+ Specifies teradataml DataFrame which is to be used to get the desired
1199
+ aggregate column expression.
1200
+ Types: teradataml DataFrame
1201
+
1202
+ column_names:
1203
+ Required Argument.
1204
+ Specifies the column names for which desired aggregate operation is
1205
+ to be executed.
1206
+ Types: List of strings
1207
+
1208
+ column_types:
1209
+ Required Argument.
1210
+ Specifies the respective column types for column names.
1211
+ Types: List of teradatasqlalchemy types
1212
+
1213
+ func:
1214
+ Required Argument.
1215
+ Specifies the aggregate function(s) to be applied on teradataml
1216
+ DataFrame columns. For Time Series aggregates it is usually a string.
1217
+ Types: str
1218
+
1219
+ kwargs:
1220
+ Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
1221
+
1222
+ RETURNS :
1223
+ a)sql expression as
1224
+ 1. 'bottom(2, "col1") as "bottom2col1"' if
1225
+ col1 and col2 are the columns in Dataframe and
1226
+ operation is 'bottom'
1227
+ etc...
1228
+ b) new columns' names (eg min_col1, min_col2 ...)
1229
+ c) new columns' types
1230
+
1231
+ RAISES:
1232
+ None.
1233
+
1234
+ EXAMPLES:
1235
+ colname_to_numvalues = {"col1" : 2, "col2": 3}
1236
+ kwargs = {"colname_to_numvalues": colname_to_numvalues}
1237
+ aggregate_expr, column_names, column_types = \
1238
+ df_utils._construct_sql_expression_for_time_series_aggregations(column_names, column_types,
1239
+ func, **kwargs)
1240
+
1241
+ """
1242
+
1243
+ # eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
1244
+ # FLOAT(precision=0)]
1245
+
1246
+ # eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
1247
+ # <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
1248
+ # <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
1249
+
1250
+ col_names_and_types = dict(zip(column_names, column_types))
1251
+ tdp = preparer(td_dialect)
1252
+
1253
+ select_columns = []
1254
+ new_column_names = []
1255
+ new_column_types = []
1256
+ if func in ["bottom", "bottom with ties", "top", "top with ties"]:
1257
+ # Processing for bottom and top.
1258
+ # Function name to be used in column aliasing.
1259
+ column_alias_func = func.replace(" ", "_")
1260
+ bottom_col_val = kwargs["colname_to_numvalues"]
1261
+ for column in sorted(list(bottom_col_val.keys())):
1262
+ new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
1263
+ quoted_parent_column_name = tdp.quote("{0}".format(column))
1264
+ quoted_new_column_name = tdp.quote(new_col_name)
1265
+ select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
1266
+ quoted_parent_column_name, quoted_new_column_name))
1267
+ new_column_names.append(new_col_name)
1268
+ new_column_types.append(col_names_and_types[column])
1269
+
1270
+ if func == "delta_t":
1271
+ # Argument processing for DELTA-T
1272
+ new_column_names.append("delta_t_td_timecode")
1273
+ quoted_new_column_name = tdp.quote(new_column_names[0])
1274
+ new_column_types.append(PERIOD_TIMESTAMP)
1275
+ select_columns.append("{0}((WHERE {1}), (WHERE {2})) as {3}".format(func, kwargs["start_condition"],
1276
+ kwargs["end_condition"],
1277
+ quoted_new_column_name))
1278
+
1279
+ if func == 'mad':
1280
+ # Processing for Median Absolute Deviation.
1281
+ # Function name to be used in column aliasing.
1282
+ column_alias_func = func.replace(" ", "_")
1283
+ bottom_col_val = kwargs["colname_to_numvalues"]
1284
+ for column in sorted(list(bottom_col_val.keys())):
1285
+ new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
1286
+ quoted_parent_column_name = tdp.quote("{0}".format(column))
1287
+ quoted_new_column_name = tdp.quote(new_col_name)
1288
+ select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
1289
+ quoted_parent_column_name, quoted_new_column_name))
1290
+ new_column_names.append(new_col_name)
1291
+ if type(col_names_and_types[column]) in [DECIMAL, NUMBER]:
1292
+ # If column types is DECIMAL or NUMBER, then output column types should also be same.
1293
+ # Otherwise, it is FLOAT.
1294
+ new_column_types.append(col_names_and_types[column])
1295
+ else:
1296
+ new_column_types.append(FLOAT())
1297
+
1298
+ if "default_constant_for_columns" in kwargs.keys():
1299
+ column_names = kwargs["default_constant_for_columns"]
1300
+ column_types = [col_names_and_types[column] for column in column_names]
1301
+ if len(column_names) > 0:
1302
+ aggregate_expr, all_new_column_names, all_new_column_types = \
1303
+ DataFrameUtils._construct_sql_expression_for_aggregations(df=df, column_names=column_names,
1304
+ column_types=column_types, func=func,
1305
+ )
1306
+ aggregate_expr_default_column_list = [col.strip() for col in aggregate_expr.split(",")]
1307
+ select_columns = select_columns + aggregate_expr_default_column_list
1308
+ new_column_names = new_column_names + all_new_column_names
1309
+ new_column_types = new_column_types + all_new_column_types
1310
+
1311
+
1312
+ aggregate_expr = ", ".join(select_columns)
1313
+ return aggregate_expr, new_column_names, new_column_types
1314
+
1315
+ @staticmethod
1316
+ def _validate_describe_columns(columns, metaexpr, groupby_column_list):
1317
+ """
1318
+ Internal function to validate columns provided to describe() are correct or not,
1319
+ when DataFrame is output of groupby and groupby_time.
1320
+ PARAMETERS:
1321
+ columns:
1322
+ Optional Argument.
1323
+ Specifies the name(s) of columns we are collecting statistics for.
1324
+ Types: str ot List of strings (str)
1325
+ metaexpr:
1326
+ Required Argument.
1327
+ Specifies the meta expression for the dataframe.
1328
+ Types: _MetaExpression
1329
+ groupby_column_list:
1330
+ Optional Argument.
1331
+ Specifies the group by columns for the dataframe.
1332
+ Default Values: None.
1333
+ Types: str ot List of strings (str)
1334
+ Returns:
1335
+ None
1336
+ Raises:
1337
+ TeradataMLException
1338
+ """
1339
+ invalid_columns = [_column for _column in groupby_column_list if _column in columns]
1340
+ if len(invalid_columns) > 0:
1341
+ all_columns = [col.name for col in metaexpr.c]
1342
+ valid_columns = [item for item in all_columns if item not in groupby_column_list]
1343
+ msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
1344
+ format(", ".join(invalid_columns), 'columns', ", ".join(valid_columns))
1345
+ raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
1346
+
1347
+ @staticmethod
1348
+ def _construct_describe_query(df, columns, metaexpr, percentiles, function_label, groupby_column_list=None,
1349
+ include=None, is_time_series_aggregate=False, verbose=False, distinct=False,
1350
+ statistics=None, **kwargs):
1351
+ """
1352
+ Internal function to create the sql query for describe().
1353
+
1354
+ PARAMETERS :
1355
+ df:
1356
+ Required Argument.
1357
+ Specifies teradataml DataFrame we are collecting statistics for.
1358
+ Types: str
1359
+
1360
+ columns:
1361
+ Optional Argument.
1362
+ Specifies the name(s) of columns we are collecting statistics for.
1363
+ Types: str ot List of strings (str)
1364
+
1365
+ metaexpr:
1366
+ Required Argument.
1367
+ Specifies the meta expression for the dataframe.
1368
+ Types: _MetaExpression
1369
+
1370
+ percentiles:
1371
+ Required Argument.
1372
+ Specifies a list of values between 0 and 1.
1373
+ Types: List of floats
1374
+
1375
+ function_label:
1376
+ Required Argument.
1377
+ Specifies a string value used as the label for the aggregate function column.
1378
+ Types: str
1379
+
1380
+ groupby_column_list:
1381
+ Optional Argument.
1382
+ Specifies the group by columns for the dataframe.
1383
+ Default Values: None.
1384
+ Types: str ot List of strings (str)
1385
+
1386
+ include:
1387
+ Optional Argument.
1388
+ Specifies a string that must be "all" or None. If "all", then all columns will be included.
1389
+ Otherwise, only numeric columns are used for collecting statistics.
1390
+ Default Values: None.
1391
+ Types: str
1392
+
1393
+ is_time_series_aggregate:
1394
+ Optional Argument.
1395
+ Specifies a flag stating whether describe operation is time series aggregate or not.
1396
+ Default Values: False.
1397
+ Types: bool
1398
+
1399
+ verbose:
1400
+ Optional Argument.
1401
+ Specifies a flag stating whether DESCRIBE VERBOSE option for time series aggregate is to be
1402
+ performed or not.
1403
+ Default Values: False.
1404
+ Types: bool
1405
+
1406
+ distinct:
1407
+ Optional Argument.
1408
+ Specifies a flag that decides whether to consider duplicate rows in calculation or not.
1409
+ Default Values: False
1410
+ Types: bool
1411
+
1412
+ kwargs:
1413
+ Optional Arguments.
1414
+ Keyword argument for time series aggregate functions.
1415
+
1416
+
1417
+ RETURNS :
1418
+ A SQL query like:
1419
+ select 'count' as "func", cast(count("Feb") as Number) as "Feb", cast(count(accounts) as Number) as accounts from "PYUSER"."salesview"
1420
+ union all
1421
+ select 'mean' as "func", cast(avg("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
1422
+ union all
1423
+ select 'std' as "func", cast(stddev_samp("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
1424
+ union all
1425
+ select 'min' as "func", cast(min("Feb") as Number) as "Feb", cast(min(accounts) as Number) as accounts from "PYUSER"."salesview"
1426
+ union all
1427
+ select '25%' as "func", percentile_cont(0.25) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1428
+ union all
1429
+ select '50%' as "func", percentile_cont(0.5) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1430
+ union all
1431
+ select '75%' as "func", percentile_cont(0.75) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
1432
+ union all
1433
+ select 'max' as "func", cast(max("Feb") as Number) as "Feb", cast(max(accounts) as Number) as accounts from "PYUSER"."salesview"
1434
+
1435
+ RAISES:
1436
+ TeradataMLException
1437
+
1438
+ EXAMPLES:
1439
+ agg_query = \
1440
+ df_utils._construct_describe_query("self._table_name", self._metaexpr, [.25, .5, .75], "func", self.groupby_column_list)
1441
+ agg_query = \
1442
+ df_utils._construct_describe_query("self._table_name", self._metaexpr, [.3, .6], "func", self.groupby_column_list, include="all")
1443
+
1444
+ """
1445
+ table_name = df._table_name
1446
+ operators = ["count", "mean", "std", "min", "percentile", "max"]
1447
+ all_operators = ["count", "unique", "mean", "std", "min", "percentile", "max"]
1448
+
1449
+ if is_time_series_aggregate and verbose:
1450
+ # Time Series Aggregate Operators for Vantage DESCRIBE function with verbose
1451
+ operators = ['max', 'mean', 'median', 'min', 'mode', "percentile", 'std']
1452
+ elif is_time_series_aggregate and not verbose:
1453
+ # Time Series Aggregate Operators for Vantage DESCRIBE function.
1454
+ operators = ['max', 'mean', 'min', 'std']
1455
+
1456
+ col_names = []
1457
+ col_types = []
1458
+ sel_agg_stmts = []
1459
+ tdp = preparer(td_dialect)
1460
+ quoted_function_label = tdp.quote(function_label)
1461
+
1462
+ if include is not None and include == 'all' and not is_time_series_aggregate:
1463
+ operators = all_operators
1464
+
1465
+ if include is None and statistics is not None:
1466
+ operators = statistics
1467
+
1468
+ table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
1469
+ is_time_series_aggregate, **kwargs)
1470
+
1471
+ for col in metaexpr.c:
1472
+ if (include is None and type(col.type) in UtilFuncs()._get_numeric_datatypes()) or include == 'all' or statistics is not None:
1473
+ if not(groupby is not None and col.name in groupby_column_list):
1474
+ if columns is None or col.name in columns:
1475
+ col_names.append(col.name)
1476
+ col_types.append(col.type)
1477
+
1478
+
1479
+ if len(col_names) == 0:
1480
+ raise TeradataMlException(
1481
+ Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR,
1482
+ "The DataFrame does not contain numeric columns"),
1483
+ MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
1484
+ for op in operators:
1485
+ if op == "percentile":
1486
+ for p in percentiles:
1487
+ agg_expr, new_col_names, new_col_types = \
1488
+ DataFrameUtils._construct_sql_expression_for_aggregations(df,
1489
+ col_names, col_types, op, percentile=p, describe_op=True, distinct=distinct,
1490
+ as_time_series_aggregate=is_time_series_aggregate)
1491
+ sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}%' as varchar(6)) as \"{1}\", {2} from {3} ".format(
1492
+ int(p*100), quoted_function_label, agg_expr, table_name, sel_groupby))
1493
+ else:
1494
+ agg_expr, new_col_names, new_col_types = \
1495
+ DataFrameUtils._construct_sql_expression_for_aggregations(df,
1496
+ col_names, col_types, op, describe_op=True, distinct=distinct,
1497
+ as_time_series_aggregate=is_time_series_aggregate)
1498
+ sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}' as varchar(6)) as \"{1}\", \n\t{2} \nfrom \n\t{3} ".format(
1499
+ op, quoted_function_label, agg_expr, table_name, sel_groupby))
1500
+ return " \nunion all\n ".join(sel_agg_stmts)
1501
+
1502
+ @staticmethod
1503
+ def _process_groupby_clause(table_name, groupby_column_list, is_time_series_aggregate, **kwargs):
1504
+ """
1505
+ Internal function used to process and generate GROUP BY or GROUP BY TIME clauses required for
1506
+ query to be run for describe operation.
1507
+
1508
+ PARAMETERS:
1509
+ table_name:
1510
+ Required Arguments.
1511
+ Specifies table name to be used for forming describe query.
1512
+ Types: str
1513
+
1514
+ groupby_column_list:
1515
+ Required Arguments.
1516
+ Specifies list of column names involved in Group By.
1517
+ Types: List of Strings.
1518
+
1519
+ is_time_series_aggregate:
1520
+ Required Arguments.
1521
+ Specifies a boolean stating whether GROUP BY clause to be formed is for
1522
+ Time series aggregate or not.
1523
+ Types: bool
1524
+
1525
+ kwargs:
1526
+ Optional Arguments.
1527
+ Keyword argument for time series aggregate functions.
1528
+
1529
+ RETURNS:
1530
+ 1. Table Name appended with GROUP BY clause.
1531
+ 2. Column projection string for GROUP BY columns.
1532
+ 3. Group By Clause.
1533
+
1534
+ RAISES:
1535
+ None.
1536
+
1537
+ EXAMPLES:
1538
+ table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
1539
+ is_time_series_aggregate, **kwargs)
1540
+
1541
+ """
1542
+ sel_groupby = ""
1543
+ grp_by_clause = None
1544
+
1545
+ if is_time_series_aggregate:
1546
+ # For time series aggregate timebucket_duration is must so, it'll be always present in kwargs.
1547
+ grp_by_clause = "GROUP BY TIME ({0}".format(kwargs['timebucket_duration'])
1548
+
1549
+ # Add columns in value expression to GROUP BY TIME
1550
+ if 'value_expression' in kwargs and \
1551
+ kwargs['value_expression'] is not None and \
1552
+ len(kwargs['value_expression']) > 0:
1553
+ grp_by_clause = "{0} and {1}".format(grp_by_clause, ", ".join(kwargs['value_expression']))
1554
+
1555
+ # Complete the parenthesis for GROUP BY TIME
1556
+ grp_by_clause = "{0})".format(grp_by_clause)
1557
+
1558
+ # Add Time code column information.
1559
+ if 'timecode_column' in kwargs and \
1560
+ kwargs['timecode_column'] is not None and \
1561
+ len(kwargs['timecode_column']) > 0:
1562
+ if 'sequence_column' in kwargs and \
1563
+ kwargs['timecode_column'] is not None and \
1564
+ len(kwargs['timecode_column']) > 0:
1565
+ grp_by_clause = "{0} USING TIMECODE({1}, {2})".format(grp_by_clause, kwargs['timecode_column'],
1566
+ kwargs['sequence_column'])
1567
+ else:
1568
+ grp_by_clause = "{0} USING TIMECODE({1})".format(grp_by_clause, kwargs['timecode_column'])
1569
+
1570
+ # Add Fill inforamtion
1571
+ if 'fill' in kwargs and kwargs['fill'] is not None and len(kwargs['fill']) > 0:
1572
+ grp_by_clause = "{0} FILL({1})".format(grp_by_clause, kwargs['fill'])
1573
+
1574
+ else:
1575
+ if groupby_column_list is not None:
1576
+ grp_by_clause = "GROUP BY {0}".format(",".join(groupby_column_list))
1577
+
1578
+ if grp_by_clause is not None:
1579
+ table_name = "{0} \n{1}".format(table_name, grp_by_clause)
1580
+ tdp = preparer(td_dialect)
1581
+ for g in groupby_column_list:
1582
+ if is_time_series_aggregate:
1583
+ if g == "TIMECODE_RANGE":
1584
+ g = "$TD_TIMECODE_RANGE"
1585
+
1586
+ if "GROUP BY TIME" in g:
1587
+ g = "$TD_GROUP_BY_TIME"
1588
+
1589
+ quoted_name = tdp.quote(g)
1590
+ sel_groupby += "{0}, ".format(quoted_name)
1591
+
1592
+ return table_name, sel_groupby, grp_by_clause
1593
+
1594
+ @staticmethod
1595
+ def _get_column_names_and_types_from_metaexpr(metaexpr):
1596
+ """
1597
+ Internal function to return column names and respective types
1598
+ given _metaexpr.
1599
+
1600
+ PARAMETERS:
1601
+ metaexpr:
1602
+ Required Argument.
1603
+ Dataframe's metaexpr. It is used to get column names and types.
1604
+ Types: MetaExpression
1605
+
1606
+ RETURNS:
1607
+ Two lists - one for column names and another for column types
1608
+
1609
+ RAISES:
1610
+ None
1611
+
1612
+ EXAMPLES:
1613
+ dfUtils._get_column_names_and_types_from_metaexpr(
1614
+ df._metaexpr)
1615
+ """
1616
+ # Constructing New Column names & Types for selected columns ONLY using Parent _metaexpr
1617
+ col_names = []
1618
+ col_types = []
1619
+ for c in metaexpr.c:
1620
+ col_names.append(c.name)
1621
+ col_types.append(c.type)
1622
+
1623
+ return col_names, col_types
1624
+
1625
+ @staticmethod
1626
+ def _insert_all_from_table(to_table_name, from_table_name, column_list, to_schema_name=None,
1627
+ from_schema_name=None, temporary=False):
1628
+ """
1629
+ Inserts all records from one table into the second, using columns ordered by column list.
1630
+
1631
+ PARAMETERS:
1632
+ to_table_name - String specifying name of the SQL Table to insert to.
1633
+ from_table_name - String specifying name of the SQL Table to insert from.
1634
+ column_list - List of strings specifying column names used in the insertion.
1635
+ to_schema_name - Name of the database schema to insert table data into.
1636
+ from_schema_name - Name of the database schema to insert table data from.
1637
+ temporary - Specifies whether to create Vantage tables as permanent or volatile.
1638
+ Default: False
1639
+ Note: When True:
1640
+ 1. volatile Tables are created, and
1641
+ 2. schema_name is ignored.
1642
+ When False, permanent tables are created.
1643
+ RETURNS:
1644
+ None
1645
+
1646
+ RAISES:
1647
+ Database error if an error occurred while executing the insert command.
1648
+
1649
+ EXAMPLES:
1650
+ df_utils._insert_all_from_table('table1_name', 'table2_name', ['col1', 'col2', 'col3'])
1651
+ """
1652
+ tdp = preparer(td_dialect)
1653
+
1654
+ # Construct INSERT command.
1655
+ column_order_string = ', '.join([tdp.quote("{0}".format(element)) for element in column_list])
1656
+
1657
+ # Generate full name of the destination table.
1658
+ if to_schema_name:
1659
+ full_to_table_name = tdp.quote(to_schema_name) + "." + tdp.quote(to_table_name)
1660
+ elif temporary:
1661
+ full_to_table_name = tdp.quote(to_table_name)
1662
+ else:
1663
+ full_to_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
1664
+ to_table_name)
1665
+
1666
+ # Generate full name of source table.
1667
+ if from_schema_name:
1668
+ full_from_table_name = tdp.quote(from_schema_name) + "." + tdp.quote(from_table_name)
1669
+ else:
1670
+ full_from_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
1671
+ from_table_name)
1672
+
1673
+ insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name,
1674
+ full_from_table_name,
1675
+ column_order_string)
1676
+ # Execute INSERT command.
1677
+ return UtilFuncs._execute_ddl_statement(insert_sql)
1678
+
1679
+ @staticmethod
1680
+ def _dataframe_has_column(data, column):
1681
+ """
1682
+ Function to check whether column names in columns are present in given dataframe or not.
1683
+ This function is used currently only for Analytics wrappers.
1684
+
1685
+ PARAMETERS:
1686
+ data - teradataml DataFrame to check against for column existence.
1687
+ column - Column name (a string).
1688
+
1689
+ RAISES:
1690
+ None
1691
+
1692
+ EXAMPLES:
1693
+ DataFrameUtils._dataframe_has_column(data, col)
1694
+ """
1695
+ if column in [c.name for c in data._metaexpr.c]:
1696
+ return True
1697
+
1698
+ return False
1699
+
1700
+ @staticmethod
1701
+ def _get_row_count(table_name):
1702
+ """
1703
+ Function to return the row count of a teradataml Dataframe.
1704
+ This function is used currently to determine the shape/size of a dataframe.
1705
+
1706
+ PARAMETERS:
1707
+ table_name - Name of the table to get the row count for.
1708
+
1709
+ RAISES:
1710
+ TeradataMlException (TDMLDF_INFO_ERROR)
1711
+
1712
+ EXAMPLES:
1713
+ DataFrameUtils._get_row_count(table_name)
1714
+ """
1715
+ # Construct COUNT(*) Query
1716
+ try:
1717
+ row_count_query = SQLBundle._build_nrows_print_query(table_name)
1718
+ res = execute_sql(row_count_query)
1719
+ return res.fetchone()[0]
1720
+
1721
+ except TeradataMlException:
1722
+ raise
1723
+
1724
+ except Exception as err:
1725
+ # TODO Better handle the level of information being presented to the user with logging
1726
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
1727
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1728
+
1729
+ @staticmethod
1730
+ def _get_scalar_value(table_name):
1731
+ """
1732
+ Function to return the the only 1x1 (scalar) value from a teradataml Dataframe.
1733
+
1734
+ PARAMETERS:
1735
+ table_name - Name of the table to get the value from.
1736
+
1737
+ RAISES:
1738
+ TeradataMlException (TDMLDF_INFO_ERROR)
1739
+
1740
+ EXAMPLES:
1741
+ DataFrameUtils._get_scalar_value(table_name)
1742
+ """
1743
+ # Construct the base Query
1744
+ try:
1745
+ select_query = SQLBundle._build_base_query(table_name)
1746
+ res = execute_sql(select_query)
1747
+ return res.fetchone()[0]
1748
+
1749
+ except TeradataMlException:
1750
+ raise
1751
+
1752
+ except Exception as err:
1753
+ # TODO Better handle the level of information being presented to the user with logging
1754
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
1755
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1756
+
1757
+ @staticmethod
1758
+ def _get_sorted_nrow(df, n, sort_col, asc=True):
1759
+ """
1760
+ Internal Utility function that returns a teradataml DataFrame containing n rows
1761
+ of the DataFrame. The Dataframe is sorted on the index column or the first column
1762
+ if there is no index column.
1763
+
1764
+ PARAMETERS:
1765
+ df: teradataml DataFrame
1766
+ n: Specifies the number of rows to select.
1767
+ Type: int
1768
+ sort_col: The column to sort on.
1769
+ Type: str
1770
+ asc: (optional) - Specifies sort order.
1771
+ If True, sort in ascending order.
1772
+ If False, sort in descending order.
1773
+ The default value is True.
1774
+ Type: boolean
1775
+
1776
+ RETURNS:
1777
+ teradataml DataFrame
1778
+
1779
+ EXAMPLES:
1780
+ DataFrameUtils._get_sorted_nrow(df, 10)
1781
+ DataFrameUtils._get_sorted_nrow(df, 20, asc=True)
1782
+ DataFrameUtils._get_sorted_nrow(df, 30, asc=False)
1783
+
1784
+ """
1785
+ #TODO: implement and use this in teradatasqlalchemy
1786
+ tdp = preparer(td_dialect)
1787
+ aed_utils = AedUtils()
1788
+
1789
+ sort_order = "asc"
1790
+ if not asc:
1791
+ sort_order = "desc"
1792
+
1793
+ quoted_cols = [tdp.quote(c) for c in df.columns]
1794
+ sel_cols_str = ",".join(quoted_cols)
1795
+ sel_row_num = "row_number() over (order by \"{0}\" {1}) - 1 as tdml_row_num, {2}".format(sort_col, sort_order, sel_cols_str)
1796
+ filter_str = "tdml_row_num < {0}".format(n)
1797
+ sel_nodeid = aed_utils._aed_select(df._nodeid, sel_row_num)
1798
+ fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
1799
+ sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
1800
+ col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
1801
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types),
1802
+ datalake=df._metaexpr.datalake)
1803
+ # Call the function from_node from appropriate class either DataFrame or GeoDataFrame
1804
+ new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
1805
+ new_df._orderby = df._orderby
1806
+ new_df._metaexpr._n_rows = n
1807
+ return new_df
1808
+
1809
+ @staticmethod
1810
+ def _get_database_names(connection, schema_name):
1811
+ """
1812
+ Function to return a list valid of database names for a given sqlalchemy connection.
1813
+ This function is used to determine whether the database used is valid in user APIs such as copy_to_sql.
1814
+
1815
+ PARAMETERS:
1816
+ connection: Required Argument.
1817
+ A SQLAlchemy connection object.
1818
+
1819
+ schema_name: Required Argument
1820
+ String specifying the requested schema name.
1821
+
1822
+ RAISES:
1823
+ TeradataMlException (TDMLDF_INFO_ERROR)
1824
+
1825
+ EXAMPLES:
1826
+ DataFrameUtils._get_database_names(get_connection(), schema_name)
1827
+ """
1828
+ #TODO: implement and use this in teradatasqlalchemy
1829
+ table_obj = table('databasesV', column('databasename'), schema='dbc')
1830
+ stmt = select(text(str(func.lower(table_obj.c.databasename)) + ' as databasename')).where(
1831
+ text('databasename (NOT CASESPECIFIC) = {} (NOT CASESPECIFIC)'.format(':schema_name')))
1832
+ stmt = text(str(stmt))
1833
+ stmt = stmt.bindparams(schema_name=schema_name)
1834
+ res = connection.execute(stmt).fetchall()
1835
+ return [name.databasename for name in res]
1836
+
1837
+ @staticmethod
1838
+ def _get_common_parent_df_from_dataframes(dfs):
1839
+ """
1840
+ Internal function to return common parent dataframe from given list of dataframes.
1841
+ """
1842
+ from teradataml import DataFrame, in_schema
1843
+ aed_utils = AedUtils()
1844
+ if len(dfs) == 1:
1845
+ operation = aed_utils._aed_get_node_query_type(dfs[0]._nodeid)
1846
+ if operation in ["table", "assign"]:
1847
+ # Assign might have removed some columns and if it is only one dataframe,
1848
+ # then return the same dataframe.
1849
+ # Return the same dataframe if it is DataFrame object from table.
1850
+ return dfs[0]
1851
+
1852
+ # If select node or any other node, then get the parent node and execute it.
1853
+ pids = aed_utils._aed_get_parent_nodeids(dfs[0]._nodeid)
1854
+ if not aed_utils._aed_is_node_executed(pids[0]):
1855
+ _ = DataFrameUtils._execute_node_return_db_object_name(pids[0])
1856
+
1857
+ tab_name_first = aed_utils._aed_get_source_tablename(pids[0])
1858
+
1859
+ db_schema = UtilFuncs._extract_db_name(tab_name_first)
1860
+ db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1861
+ if dfs[0]._metaexpr.datalake:
1862
+ return DataFrame(in_schema(db_schema, db_table_name, dfs[0]._metaexpr.datalake))
1863
+
1864
+ if db_schema:
1865
+ return DataFrame(in_schema(db_schema, db_table_name))
1866
+
1867
+ return DataFrame(db_table_name)
1868
+
1869
+ pids_first = None
1870
+ parent_df = None
1871
+ for i in range(len(dfs)):
1872
+ pids = aed_utils._aed_get_parent_nodeids(dfs[i]._nodeid)
1873
+
1874
+ if parent_df is None:
1875
+ if not aed_utils._aed_is_node_executed(pids[0]):
1876
+ _ = DataFrameUtils._execute_node_return_db_object_name(pids[0])
1877
+
1878
+ tab_name_first = aed_utils._aed_get_source_tablename(pids[0])
1879
+
1880
+ db_schema = UtilFuncs._extract_db_name(tab_name_first)
1881
+ db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1882
+
1883
+ if dfs[i]._metaexpr.datalake:
1884
+ parent_df = DataFrame(in_schema(db_schema, db_table_name, dfs[i]._metaexpr.datalake))
1885
+ elif db_schema:
1886
+ parent_df = DataFrame(in_schema(db_schema, db_table_name))
1887
+ else:
1888
+ parent_df = DataFrame(db_table_name)
1889
+ pids_first = pids
1890
+ else:
1891
+ if pids_first != pids:
1892
+ raise TeradataMlException(Messages.get_message(MessageCodes.DFS_NO_COMMON_PARENT),
1893
+ MessageCodes.DFS_NO_COMMON_PARENT)
1894
+
1895
+ return parent_df
1896
+
1897
+ @staticmethod
1898
+ def _get_sqlalchemy_type_from_str(td_type):
1899
+ """
1900
+ Function to get teradatasqlalchemy type from string representation of that type.
1901
+
1902
+ PARAMETERS:
1903
+ td_type:
1904
+ Required Argument.
1905
+ Specifies string representation of teradatasqlalchemy type.
1906
+ Types: str
1907
+
1908
+ RAISES:
1909
+ ValueError
1910
+
1911
+ EXAMPLES:
1912
+ >>> dt = DataFrameUtils._get_sqlalchemy_type_from_str("DECIMAL(4,4)")
1913
+ >>> dt
1914
+ DECIMAL(precision=4, scale=4)
1915
+ >>> type(dt)
1916
+ teradatasqlalchemy.types.DECIMAL
1917
+
1918
+ >>> dt = DataFrameUtils._get_sqlalchemy_type_from_str("VARCHAR(32000) CHARACTER SET UNICODE")
1919
+ >>> dt
1920
+ VARCHAR(length=32000, charset='UNICODE')
1921
+ >>> type(dt)
1922
+ teradatasqlalchemy.types.VARCHAR
1923
+ """
1924
+ # 4 groups of pattern:
1925
+ # 1. Type name
1926
+ # 2. Comma separated parameters enclosed in parentheses
1927
+ # 3. Comma separated parameters without parenthesis
1928
+ # 4. Remaining string
1929
+ pattern = r"([A-Z0-9_]+)(\((.*)\))?(.*)"
1930
+
1931
+ m = re.match(pattern, td_type)
1932
+ td_str_type = m.group(1)
1933
+ td_str_params = m.group(3)
1934
+ td_str_remain = m.group(4)
1935
+
1936
+ if m is None or td_str_type not in _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER.keys():
1937
+ raise ValueError("Invalid Teradata type: {} from datalake".format(td_type))
1938
+
1939
+ if td_str_type in ["VARCHAR", "CHAR"]:
1940
+ # If VARCHAR or CHAR, extract, length and charset from string.
1941
+ length = int(td_str_params.split(",")[0])
1942
+ charset = td_str_remain.strip().split(" ")[2]
1943
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
1944
+ (length=length, charset=charset)
1945
+
1946
+ if td_str_type in ["BLOB"]:
1947
+ # Ignoring the charset as BLOB does not have it.
1948
+ # If BLOB, extract length from string.
1949
+ length = int(td_str_params.split(",")[0])
1950
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
1951
+ (length=length)
1952
+
1953
+ if td_str_type in ["DECIMAL"]:
1954
+ # If DECIMAL, extract precision and scale from string.
1955
+ args = td_str_params.split(",")
1956
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
1957
+ (precision=int(args[0]), scale=int(args[1]))
1958
+
1959
+ # TODO: Test for other data types once OTF team finalize all data types.
1960
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]()
1961
+
1962
+ @staticmethod
1963
+ def _get_datalake_table_columns_info(schema, table_name, datalake, use_dialect=False):
1964
+ """
1965
+ Function to get column names and corresponding teradatasqlalchemy types
1966
+ of a datalake table using results of 'help table <datalake>.<db_name>.<table_name>'
1967
+ SQL query.
1968
+
1969
+ PARAMETERS:
1970
+ schema:
1971
+ Required Argument.
1972
+ Specifies name of schema.
1973
+ Types: str
1974
+
1975
+ table_name:
1976
+ Required Argument.
1977
+ Specifies name of table.
1978
+ Types: str
1979
+
1980
+ datalake:
1981
+ Required Argument.
1982
+ Specifies name of datalake.
1983
+ Types: str
1984
+
1985
+ RAISES:
1986
+ TeradataMlException
1987
+
1988
+ EXAMPLES:
1989
+ >>> DataFrameUtils._get_datalake_table_columns_info(table_name = 'sales',
1990
+ ... schema='otftestdb',
1991
+ ... datalake='datalake_iceberg_glue')
1992
+ (['id', 'masters', 'gpa', 'stats', 'programming', 'admitted'],
1993
+ [INTEGER(),
1994
+ VARCHAR(length=2000, charset='UNICODE'),
1995
+ FLOAT(),
1996
+ VARCHAR(length=2000, charset='UNICODE'),
1997
+ VARCHAR(length=2000, charset='UNICODE'),
1998
+ INTEGER()])
1999
+ """
2000
+ col_names = []
2001
+ col_types = []
2002
+ if not use_dialect:
2003
+ # Get the column information from the strings type.
2004
+ prepared = preparer(td_dialect())
2005
+ sqlbundle = SQLBundle()
2006
+ full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
2007
+ prepared.quote(schema),
2008
+ prepared.quote(table_name))
2009
+ help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
2010
+
2011
+ cur = execute_sql(help_table_sql)
2012
+ td_types_col_index = -1
2013
+
2014
+ for i, col_metadata in enumerate(cur.description):
2015
+ # Help Table returns column names and
2016
+ # corresponding IcebergType, TeradataInternalType,
2017
+ # TeradataType. We need to extract column index for
2018
+ # 'TeradataType' column.
2019
+ if col_metadata[0].lower() in ['teradatatype', 'Type']:
2020
+ td_types_col_index = i
2021
+
2022
+ if td_types_col_index > -1:
2023
+ for col_info in cur.fetchall():
2024
+ col_names.append(col_info[0])
2025
+ col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
2026
+ else:
2027
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
2028
+ MessageCodes.TDMLDF_CREATE_FAIL)
2029
+ else:
2030
+ new_kwarg = get_connection().dialect.__class__.__name__ + "_datalake"
2031
+ all_col_info = get_connection().dialect.get_columns(connection=get_connection(),
2032
+ table_name=table_name,
2033
+ schema=schema,
2034
+ table_only=True,
2035
+ **{new_kwarg: datalake})
2036
+ for col_dict in all_col_info:
2037
+ col_names.append(col_dict.get('name', col_dict.get('Column Name')))
2038
+ col_types.append(col_dict.get('type', col_dict.get('Type')))
2039
+
2040
+ return col_names, col_types
2041
+
2042
+ @staticmethod
2043
+ def check_otf_dataframe():
2044
+ """Decorator for validating if DataFrame is created on OTF table or not and throw error."""
2045
+ def decorator(method):
2046
+ def wrapper(self, *args, **kwargs):
2047
+ if not self._datalake:
2048
+ attr = getattr(type(self), method.__name__, None)
2049
+ caller_name = method.__name__ + '()'
2050
+ if isinstance(attr, property):
2051
+ caller_name = method.__name__
2052
+ raise TeradataMlException(Messages.get_message(MessageCodes.OTF_TABLE_REQUIRED,
2053
+ caller_name),
2054
+ MessageCodes.UNSUPPORTED_OPERATION)
2055
+
2056
+ return method(self, *args, **kwargs)
2057
+
2058
+ return wrapper
2059
+
2060
+ return decorator
2061
+
2062
+ @staticmethod
2063
+ def _get_column_info_from_query(query):
2064
+ """
2065
+ DESCRIPTION:
2066
+ Get the column name and type from the SQL query.
2067
+ Note:
2068
+ This function obtains the SQL statement metadata without executing the query.
2069
+
2070
+ PARAMETERS:
2071
+ query:
2072
+ Required Argument.
2073
+ Specifies the SQL query to analyze.
2074
+ Types: str
2075
+
2076
+ RETURNS:
2077
+ dict: A dictionary mapping column names to their teradatasqlalchemy types.
2078
+
2079
+ EXAMPLES:
2080
+ >>> query = "SELECT * FROM my_table"
2081
+ >>> DataFrameUtils._get_column_info_from_query(query)
2082
+ {'col1': VARCHAR(length=20, charset='UNICODE'), 'col2': INTEGER()}
2083
+ """
2084
+ # Get the column metadata by executing the query with teradata_rpo(S) and
2085
+ # teradata_fake_result_sets escape functions
2086
+ cur = execute_sql('{fn teradata_rpo(S)}{fn teradata_fake_result_sets}' + query)
2087
+ row = cur.fetchone()
2088
+
2089
+ # When using {fn teradata_rpo(S)}{fn teradata_fake_result_sets}, the result row contains:
2090
+ # >>> print([col[0] for col in cur.description])
2091
+ # ['NativeSQL', 'RequestNumber', 'StatementNumber', 'ActivityType', 'ActivityCount',
2092
+ # 'WarningCode', 'WarningMessage', 'ColumnMetadata', 'ParameterMetadata']
2093
+ #
2094
+ # Example of row[7] JSON structure:
2095
+ # [
2096
+ # {
2097
+ # "Title": "column_name",
2098
+ # "TypeName": "col_type",
2099
+ # "Precision": 0,
2100
+ # "Scale": 0,
2101
+ # ...
2102
+ # }
2103
+ # ]
2104
+ column_metadata_json = row[7]
2105
+ column_metadata = json.loads(column_metadata_json)
2106
+
2107
+ # Extract column name and get teradatasqlalchemy type from the column metadata.
2108
+ column_info = {}
2109
+ for col_info in column_metadata:
2110
+ col_name = col_info['Title']
2111
+ col_type = _Dtypes._get_td_type_from_metadata(col_info)
2112
+ column_info[col_name] = col_type
2113
+
2114
+ return column_info