teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -1,2010 +1,2040 @@
1
- """
2
- Unpublished work.
3
- Copyright (c) 2021 by Teradata Corporation. All rights reserved.
4
- TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
5
-
6
- Primary Owner: pradeep.garre@teradata.com
7
- Secondary Owner: PankajVinod.Purandare@teradata.com
8
-
9
- This file implements several classes which executes analytic functions such as
10
- SQLE functions and UAF functions on Vantage.
11
- File implements classes for following:
12
- * _AnlyticFunctionExecutor
13
- * _SQLEFunctionExecutor
14
- * _TableOperatorExecutor
15
- * _BYOMFunctionExecutor
16
- """
17
-
18
- from teradataml.options.configure import configure
19
- from teradataml.common.constants import TeradataConstants, TeradataAnalyticFunctionTypes
20
- from teradataml.analytics.json_parser import PartitionKind
21
- from teradataml.analytics.analytic_query_generator import AnalyticQueryGenerator, UAFQueryGenerator
22
- from teradataml.analytics.json_parser.json_store import _JsonStore
23
- from teradataml.analytics.utils import FuncSpecialCaseHandler
24
- from teradataml.options.display import display
25
- from teradataml.common.exceptions import TeradataMlException
26
- from teradataml.common.garbagecollector import GarbageCollector
27
- from teradataml.common.messages import Messages, MessageCodes
28
- from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
29
- from teradataml.common.utils import UtilFuncs
30
- from teradataml.context.context import _get_context_temp_databasename
31
- from teradataml.table_operators.table_operator_query_generator import TableOperatorQueryGenerator
32
- from teradataml.utils.dtypes import _ListOf
33
- from teradataml.utils.validators import _Validators
34
-
35
- import time
36
-
37
-
38
- class _AnlyticFunctionExecutor:
39
- """
40
- Class to hold the common attributes to execute analytic function.
41
- """
42
- def __init__(self, func_name, func_type):
43
- """
44
- DESCRIPTION:
45
- Constructor for the class.
46
-
47
- PARAMETERS:
48
- func_name:
49
- Required Argument.
50
- Specifies the name of the analytic function, which is exposed to user.
51
- Types: str
52
-
53
- func_type:
54
- Required Argument.
55
- Specifies whether the argument "func_name" is SQLE, UAF or Table Operator function.
56
- Types: str
57
-
58
- RAISES:
59
- TypeError OR ValueError OR TeradataMlException
60
- """
61
- self.func_name = func_name
62
- self._func_type = func_type
63
-
64
- # Input arguments passed, i.e., data members of the dynamic class to be generated.
65
- self._dyn_cls_data_members = {}
66
-
67
- # Output table arguments list
68
- self._func_output_args_sql_names = []
69
- self._func_output_args = []
70
- self._func_output_table_type = []
71
-
72
- # Generate lists for rest of the function arguments
73
- self._func_other_arg_sql_names = []
74
- self._func_other_args = []
75
- self._func_other_arg_json_datatypes = []
76
- self.sqlmr_query = None
77
- self._function_output_table_map = {}
78
- self._sql_specific_attributes = {}
79
- self._metadata = _JsonStore.get_function_metadata(self.func_name)
80
- self._mlresults = []
81
- self._awu = AnalyticsWrapperUtils()
82
- self.__build_time = None
83
- self._is_argument_dataframe = lambda object: type(object).__name__ == "DataFrame"
84
-
85
- # Initialize FuncSpecialCaseHandler.
86
- self._spl_func_obj = FuncSpecialCaseHandler(self.func_name)
87
-
88
- @staticmethod
89
- def _validate_analytic_function_argument(func_arg_name, func_arg_value, argument, additional_valid_types=None):
90
- """
91
- DESCRIPTION:
92
- Function to validate the analytic function arguments. This function does
93
- the following validations
94
- * Checks for missing mandatory argument.
95
- * Checks for the expected type for argument.
96
- * Checks for the expected values for argument.
97
- * Checks for empty value.
98
-
99
- PARAMETERS:
100
- func_arg_name:
101
- Required Argument.
102
- Specifies the name of the argument.
103
- Type: str
104
-
105
- func_arg_value:
106
- Required Argument.
107
- Specifies the value passed to argument 'func_arg_name' in analytic function.
108
- Type: str OR float OR int OR list
109
-
110
- argument:
111
- Required Argument.
112
- Specifies the argument object (_AnlyArgumentBase) containing argument
113
- information to be validated.
114
- Type: _AnlyFuncArgument OR _AnlyFuncInput
115
-
116
- RETURNS:
117
- None
118
-
119
- RAISES:
120
- ValueError OR TypeError
121
-
122
- EXAMPLES:
123
- self._validate_analytic_function_argument("arg", 1, metadata.arguments)
124
- """
125
- # Make sure that a non-NULL value has been supplied for all mandatory arguments
126
- py_types = argument.get_python_type()
127
- if additional_valid_types:
128
- if isinstance(additional_valid_types, tuple):
129
- py_types = (py_types, ) + additional_valid_types
130
- else:
131
- py_types = (py_types, additional_valid_types)
132
-
133
- argument_info = [func_arg_name,
134
- func_arg_value,
135
- not argument.is_required(),
136
- py_types
137
- ]
138
- _Validators._validate_missing_required_arguments([argument_info])
139
-
140
- # Validate for empty string if argument accepts a column name for either input or output.
141
- if not argument.is_empty_value_allowed() or argument.is_output_column():
142
- argument_info.append(True)
143
-
144
- # Validate the permitted values.
145
- if argument.get_permitted_values():
146
- if len(argument_info) == 4:
147
- argument_info.append(True)
148
- argument_info.append(argument.get_permitted_values())
149
-
150
- # Validate the function arguments.
151
- _Validators._validate_function_arguments([argument_info])
152
-
153
- def _execute_query(self, persist=False, volatile=False):
154
- """
155
- DESCRIPTION:
156
- Function to execute query on Vantage.
157
-
158
- PARAMETERS:
159
- persist:
160
- Optional Argument.
161
- Specifies whether to persist the result in a table or not.
162
- Default Value: False
163
- Type: bool
164
-
165
- volatile:
166
- Optional Argument.
167
- Specifies whether to create a volatile table or not.
168
- Default Value: False
169
- Type: bool
170
-
171
- RETURNS:
172
- None
173
-
174
- RAISES:
175
- None
176
-
177
- EXAMPLES:
178
- self._execute_query()
179
- """
180
- # Generate STDOUT table name and add it to the output table list.
181
- func_params = self._get_generate_temp_table_params(persist=persist, volatile=volatile)
182
- sqlmr_stdout_temp_tablename = UtilFuncs._generate_temp_table_name(**func_params)
183
-
184
- __execute = UtilFuncs._create_table
185
- __execute_params = (sqlmr_stdout_temp_tablename, self.sqlmr_query, volatile)
186
- if func_params["table_type"] == TeradataConstants.TERADATA_VIEW:
187
- __execute = UtilFuncs._create_view
188
- __execute_params = (sqlmr_stdout_temp_tablename, self.sqlmr_query)
189
-
190
- try:
191
- __execute(*__execute_params)
192
-
193
- # List stores names of the functions that will produce "output" attribute
194
- # when more than one results are expected.
195
- output_attr_functions = ["BincodeFit", "ChiSq", "PolynomialFeaturesFit",
196
- "RowNormalizeFit", "ScaleFit", "SimpleImputeFit"]
197
-
198
- # Store the result table in map.
199
- if self.func_name in output_attr_functions:
200
- self._function_output_table_map["output"] = sqlmr_stdout_temp_tablename
201
- else:
202
- self._function_output_table_map["result"] = sqlmr_stdout_temp_tablename
203
-
204
- if persist:
205
- # SQL is executed. So, print the table/view names.
206
- for output_attribute, table_name in self._function_output_table_map.items():
207
- print("{} data stored in table '{}'".format(output_attribute, table_name))
208
-
209
- except Exception as emsg:
210
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
211
- MessageCodes.TDMLDF_EXEC_SQL_FAILED)
212
-
213
- def _get_generate_temp_table_params(self, persist=False, volatile=False):
214
- """
215
- DESCRIPTION:
216
- Function to get the required parameters to create either table or view.
217
- When function has output table arguments or argument persist is set to True,
218
- then function returns parameters to create table otherwise returns parameters
219
- to create view. If persist is set to True or volatile is set to True, in such cases,
220
- tables created are not GC'ed.
221
-
222
- PARAMETERS:
223
- persist:
224
- Optional Argument.
225
- Specifies whether to persist the output table or not.
226
- When set to True, output tables created are not garbage collected
227
- at the end of the session, otherwise they are garbage collected.
228
- Default Value: False
229
- Types: bool
230
-
231
- volatile:
232
- Optional Argument.
233
- Specifies whether to create the output table as volatile table or not.
234
- When set to True, output tables created are garbage collected
235
- at the end of the session, otherwise they are not garbage collected.
236
- Default Value: False
237
- Types: bool
238
-
239
- RETURNS:
240
- dict
241
-
242
- RAISES:
243
- None
244
-
245
- EXAMPLES:
246
- self._get_generate_temp_table_params(True, True)
247
- """
248
- use_default_database = True
249
- db_object_type = TeradataConstants.TERADATA_VIEW
250
- prefix = "td_sqlmr_out_"
251
- gc_on_quit = True
252
-
253
- # If function produces output tables, i.e., function has output table arguments,
254
- # the 'db_object_type' should be "table" or if analytic function does not support
255
- # reading from a view created on output, then 'db_object_type' should be "table".
256
- if len(self._metadata.output_tables) > 0 or not self._metadata._is_view_supported:
257
- db_object_type = TeradataConstants.TERADATA_TABLE
258
-
259
- # If result is to be persisted or if the table is a volaile table then, db_object_type
260
- # should be "table" and it must not be Garbage collected.
261
- if persist or volatile:
262
- gc_on_quit = False
263
- db_object_type = TeradataConstants.TERADATA_TABLE
264
- prefix = "td_sqlmr_{}_out_".format("persist" if persist else "volatile")
265
-
266
- return {"use_default_database": use_default_database,
267
- "table_type": db_object_type,
268
- "prefix": prefix,
269
- "gc_on_quit": gc_on_quit}
270
-
271
- def _process_output_argument(self, persist=False, volatile=False, **kwargs):
272
- """
273
- DESCRIPTION:
274
- Function to process output argument(s) of analytic function.
275
-
276
- PARAMETERS:
277
- persist:
278
- Optional Argument.
279
- Specifies whether to persist the output table or not.
280
- When session is disconnected, if function is executed with persist
281
- set to False, then output tables are removed.
282
- When set to True, output tables created are not garbage collected
283
- at the end of the session, otherwise they are garbage collected.
284
- Default Value: False
285
- Types: bool
286
-
287
- volatile:
288
- Optional Argument.
289
- Specifies whether to create the output table as volatile table or not.
290
- When set to True, output tables created are garbage collected
291
- at the end of the session, otherwise they are not garbage collected.
292
- Default Value: False
293
- Types: bool
294
-
295
- kwargs:
296
- Specifies the keyword arguments passed to a function.
297
-
298
- RETURNS:
299
- None.
300
-
301
- RAISES:
302
- None.
303
-
304
- EXAMPLES:
305
- self._process_output_argument()
306
- """
307
- # Process the output_tables argument(s) of the metadata.
308
- for output_argument in self._metadata.output_tables:
309
- lang_name = output_argument.get_lang_name()
310
-
311
- # Generate the name of the table.
312
- func_params = self._get_generate_temp_table_params(persist=persist, volatile=volatile)
313
- temp_table_name = UtilFuncs._generate_temp_table_name(**func_params)
314
-
315
- # By default, populate the output table lists irrespective of 'is_required'. However,
316
- # if the output table has a dependent argument, then check for the dependent argument
317
- # value and decide whether to populate output table lists or not.
318
- populate_output_tables = True
319
- dependent_argument = output_argument.get_is_required_dependent_argument()
320
- if dependent_argument is not None:
321
- # Dependent argument can be input_tables or arguments or output_tables.
322
- # Get the analytic function arguments based on the argument type and
323
- # check whether dependenncy is satisfied or not.
324
- for arg in getattr(self._metadata, dependent_argument.type):
325
- if arg.get_sql_name() == dependent_argument.sql_name:
326
- lang_name = arg.get_lang_name()
327
- lang_name_val = kwargs.get(lang_name)
328
- if not dependent_argument.is_required(lang_name_val):
329
- populate_output_tables = False
330
- break
331
-
332
- if populate_output_tables:
333
- self._func_output_args_sql_names.append(output_argument.get_sql_name())
334
- self._func_output_args.append(temp_table_name)
335
- self._function_output_table_map[lang_name] = temp_table_name
336
-
337
- def _process_other_argument(self, **kwargs):
338
- """
339
- DESCRIPTION:
340
- Function to process other arguments. This function does the following:
341
- * Checks the required arguments are passed or not.
342
- * Checks the type of the arguments are expected or not.
343
- * If argument accepts only specified values, function checks whether
344
- the value passed is in the specified values or not.
345
- * If all the checks pass, it then populates the corresponding lists
346
- with respective values.
347
-
348
- PARAMETERS:
349
- kwargs:
350
- Specifies the keyword arguments passed to a function.
351
-
352
- RETURNS:
353
- None.
354
-
355
- RAISES:
356
- ValueError OR TypeError OR TeradataMlException.
357
-
358
- EXAMPLES:
359
- self._process_other_arguments(arg1="string", arg2="db", arg3=2)
360
- """
361
- sequence_input_by_list = []
362
-
363
- # Before populating the corresponding lists, make sure to empty those so
364
- # duplicates won't be populated even if analytic function execution happens twice.
365
- self._func_other_arg_sql_names = []
366
- self._func_other_args = []
367
- self._func_other_arg_json_datatypes = []
368
-
369
- # Let's process formula argument.
370
- if len(self._metadata.formula_args) > 0:
371
- formula = kwargs.pop("formula", None)
372
-
373
- # If formula is passed, process formula argument,
374
- # else process components of formula individually as a
375
- # part of normal function argument processing.
376
- formula_comp_provided = False
377
- formula_comp_args = []
378
- if formula is not None:
379
- _Validators._validate_function_arguments([["formula", formula, True, str, True]])
380
- input_data = kwargs.get(self._metadata.formula_args[0].get_target_table_lang_name())
381
- formula_obj = AnalyticsWrapperUtils()._validate_formula_notation(formula, input_data, "formula")
382
-
383
- for formula_arg_component in self._metadata.formula_args:
384
- # Check if this formula argument component is separately provided
385
- # along with 'formula'. If so, raise error.
386
- formula_arg_value = kwargs.get(formula_arg_component.get_lang_name(), None)
387
- formula_comp_args.append(formula_arg_component.get_lang_name())
388
- if formula_arg_value is not None or formula_comp_provided:
389
- formula_comp_provided = True
390
- elif formula is not None:
391
- # Processing dependent component of formula.
392
- if formula_arg_component.get_r_order_number() == 0:
393
- __response_column = formula_obj._get_dependent_vars()
394
- if len(__response_column) > 0:
395
- kwargs[formula_arg_component.get_lang_name()] = __response_column
396
-
397
- # Processing non-dependent components of formula.
398
- # Non-dependent components of formula can consist columns of either all, numeric
399
- # or categorical type.
400
- else:
401
- if formula_arg_component.get_r_order_number() == -1:
402
- allowed_types_list = formula_arg_component.get_allowed_type_groups()
403
- json_to_python_type_map = {"NUMERIC": "numerical",
404
- "NUMERICAL": "numerical"
405
- }
406
- col_data_type = json_to_python_type_map.get(allowed_types_list[0], "all")
407
- elif formula_arg_component.get_r_order_number() == -2:
408
- col_data_type = "categorical"
409
-
410
- __columns = AnalyticsWrapperUtils()._get_columns_by_type(formula_obj,
411
- input_data,
412
- col_data_type)
413
- if len(__columns) > 0:
414
- kwargs[formula_arg_component.get_lang_name()] = __columns
415
- # Pass dummy value to validator if any of the formula component argument is provided.
416
- # Else pass None.
417
- _Validators._validate_mutually_exclusive_arguments(formula, "formula",
418
- 1 if formula_comp_provided else None,
419
- formula_comp_args)
420
-
421
- # Let's process all other arguments.
422
- for argument in self._metadata.arguments:
423
- sql_name = argument.get_sql_name()
424
- arg_name = argument.get_lang_name()
425
- arg_value = kwargs.get(arg_name)
426
- # Set the "argument".
427
- self._spl_func_obj.set_arg_name(argument)
428
- # Let's get spl handler if function requires.
429
- special_case_handler = self._spl_func_obj._get_handle()
430
-
431
- self._validate_analytic_function_argument(arg_name, arg_value, argument)
432
-
433
- # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
434
- # Check lower bound and upper bound for number type of arguments.
435
- if isinstance(arg_value, (int, float)):
436
-
437
- lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
438
- upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
439
- _Validators._validate_argument_range(arg_value,
440
- arg_name,
441
- lbound=argument.get_lower_bound(),
442
- ubound=argument.get_upper_bound(),
443
- lbound_inclusive=lower_bound_inclusive,
444
- ubound_inclusive=upper_bound_inclusive)
445
-
446
- if argument.is_column_argument() and not argument.get_target_table():
447
- raise TeradataMlException(
448
- Messages.get_message(MessageCodes.INVALID_JSON, "{}.json".format(self._metadata.sql_function_name),
449
- "Argument '{}' is specified as column argument but "
450
- "is Target table is not specified".format(sql_name)), MessageCodes.INVALID_JSON)
451
-
452
- # Additional Validations if argument is a Column name.
453
- if argument.is_column_argument() and argument.get_target_table():
454
- target_table_argument_name = argument.get_target_table_lang_name()
455
- dataframe = kwargs.get(target_table_argument_name)
456
- # Input table can be an object of MLE Functions too.
457
- if not self._is_argument_dataframe(dataframe) and dataframe is not None:
458
- dataframe = dataframe._mlresults[0]
459
-
460
- # Validate column is existed or not in the table.
461
- _Validators._validate_dataframe_has_argument_columns(
462
- arg_value, arg_name, dataframe, target_table_argument_name)
463
-
464
- # Append square brackets for column range when function
465
- # does not require special case handler.
466
- arg_value = self._spl_func_obj._add_square_bracket(arg_value)
467
-
468
- # SequenceInputBy arguments require special processing.
469
- if 500 <= argument.get_r_order_number() <= 510:
470
- quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
471
- sequence_input_by_list.append("{}:{}".format(sql_name, quoted_value))
472
- continue
473
-
474
- if arg_value is not None and arg_value != argument.get_default_value():
475
-
476
- # Specific type of arguments required to be passed in a single quote.
477
- # Append quote if argument requires it.
478
-
479
- # Handle special cases for arg_values based on function handler.
480
- arg_value = special_case_handler(arg_value, self._quote_collapse_other_args) \
481
- if special_case_handler is not None \
482
- else self._quote_collapse_other_args(argument, arg_value)
483
-
484
- self._func_other_arg_sql_names.append(sql_name)
485
- self._func_other_args.append(arg_value)
486
- self._func_other_arg_json_datatypes.append(argument.get_data_type())
487
-
488
- if sequence_input_by_list:
489
- self._func_other_arg_sql_names.append("SequenceInputBy")
490
- sequence_input_by_arg_value = UtilFuncs._teradata_collapse_arglist(sequence_input_by_list, "'")
491
- self._func_other_args.append(sequence_input_by_arg_value)
492
- self._func_other_arg_json_datatypes.append("STRING")
493
- self._sql_specific_attributes["SequenceInputBy"] = sequence_input_by_arg_value
494
-
495
- def _create_dynamic_class(self):
496
- """
497
- DESCRIPTION:
498
- Function dynamically creates a class with name as analytic function name.
499
-
500
- RETURNS:
501
- class
502
-
503
- RAISES:
504
- None.
505
-
506
- EXAMPLE:
507
- self._create_dynamic_class()
508
- """
509
- # Constructor for the dynamic class.
510
- def constructor(self):
511
- """ Constructor for dynamic class """
512
- # Do Nothing...
513
- pass
514
-
515
- _function_output_table_map = self._function_output_table_map
516
- # __repr__ method for dynamic class.
517
- # Note that the self represents the dynamic class object. Not the
518
- # instance of _AnlyticFunctionExecutor. So, DataFrames will be available as
519
- # attributes of the object, which is created using dynamic class.
520
- def print_result(self):
521
- """ Function to be used for representation of InDB function type object. """
522
- repr_string = ""
523
- for key in _function_output_table_map:
524
- repr_string = "{}\n############ {} Output ############".format(repr_string, key)
525
- repr_string = "{}\n\n{}\n\n".format(repr_string, getattr(self,key))
526
- return repr_string
527
- self._dyn_cls_data_members["__repr__"] = print_result
528
-
529
- query = self.sqlmr_query
530
- build_time = None if self.__build_time is None else round(self.__build_time, 2)
531
-
532
- self._dyn_cls_data_members["show_query"] = lambda x: query
533
- self._dyn_cls_data_members["get_build_time"] = lambda x: build_time
534
-
535
- # To list attributes using dict()
536
- self._dyn_cls_data_members["__dict__"] = self._dyn_cls_data_members
537
- self._dyn_cls_data_members["_mlresults"] = self._mlresults
538
-
539
- # Dynamic class creation with In-DB function name.
540
- indb_class = type(self.func_name, (object,), self._dyn_cls_data_members)
541
-
542
- return indb_class()
543
-
544
- def _generate_query(self):
545
- """
546
- DESCRIPTION:
547
- An interface, which should be implemented by child class(es) to generate the
548
- query for analytic function.
549
-
550
- RETURNS:
551
- None
552
-
553
- RAISES:
554
- None.
555
-
556
- EXAMPLE:
557
- self._generate_query()
558
- """
559
- raise NotImplementedError("Function should be implemented in child class.")
560
-
561
- def _process_input_argument(self, **kwargs):
562
- """
563
- DESCRIPTION:
564
- An interface, which should be implemented by child class(es) to
565
- process input argument(s).
566
-
567
- PARAMETERS:
568
- kwargs:
569
- Specifies the keyword arguments passed to a function.
570
-
571
- RETURNS:
572
- None.
573
-
574
- RAISES:
575
- None.
576
-
577
- EXAMPLES:
578
- self._process_input_argument()
579
- """
580
- raise NotImplementedError("Function should be implemented in child class.")
581
-
582
- def _process_function_output(self, **kwargs):
583
- """
584
- DESCRIPTION:
585
- An interface, which should be implemented by child class(es) to
586
- process the output.
587
-
588
- PARAMETERS:
589
- kwargs:
590
- Specifies the keyword arguments passed to a function.
591
-
592
- RETURNS:
593
- None.
594
-
595
- RAISES:
596
- None.
597
-
598
- EXAMPLES:
599
- self._process_function_output()
600
- """
601
- raise NotImplementedError("Function should be implemented in child class.")
602
-
603
- def _execute_function(self,
604
- skip_input_arg_processing=False,
605
- skip_output_arg_processing=False,
606
- skip_other_arg_processing=False,
607
- skip_func_output_processing=False,
608
- skip_dyn_cls_processing=False,
609
- **kwargs):
610
- """
611
- DESCRIPTION:
612
- Function processes arguments and executes the analytic function.
613
-
614
- PARAMETERS:
615
- skip_input_arg_processing:
616
- Optional Argument.
617
- Specifies whether to skip input (data) argument processing or not.
618
- Default is to process the input (data) argument.
619
- When set to True, caller should make sure to process "input" argument and
620
- pass SQL argument and values as part of kwargs to this function.
621
- Default Value: False
622
- Types: bool
623
-
624
- skip_output_arg_processing:
625
- Optional Argument.
626
- Specifies whether to skip output argument processing or not.
627
- Default is to process the output arguments.
628
- When set to True, caller should make sure to process all output arguments and
629
- pass equivalent SQL argument and values as part of kwargs to this function.
630
- Default Value: False
631
- Types: bool
632
-
633
- skip_other_arg_processing:
634
- Optional Argument.
635
- Specifies whether to skip other argument processing or not.
636
- Default is to process the other arguments, i.e., kwargs.
637
- When set to True, caller should make sure to process all other arguments are
638
- processed internally by the function.
639
- Default Value: False
640
- Types: bool
641
-
642
- skip_func_output_processing:
643
- Optional Argument.
644
- Specifies whether to skip function output processing or not.
645
- Default is to process the same.
646
- When set to True, caller should make sure to process function output.
647
- Generally, when this argument is set to True, one must also
648
- set "skip_dyn_cls_processing" to True.
649
- Default Value: False
650
- Types: bool
651
-
652
- skip_dyn_cls_processing:
653
- Optional Argument.
654
- Specifies whether to skip dynamic class processing or not.
655
- Default is to process the dynamic class, where it creates a dynamic
656
- class and an instance of the same and returns the same.
657
- When set to True, caller should make sure to process dynamic class and
658
- return an instance of the same.
659
- Default Value: False
660
- Types: bool
661
-
662
- kwargs:
663
- Specifies the keyword arguments passed to a function.
664
-
665
- RETURNS:
666
- An object of class, with the name same as analytic function.
667
-
668
- RAISES:
669
- TeradataMlException, TypeError
670
-
671
- EXAMPLES:
672
- self._execute_function(arg1="string", arg2="db", arg3=2)
673
- """
674
- # kwargs may contain all additional arguments in 'generic_arguments'.
675
- # Hence update it to actual kwargs.
676
- kwargs.update(kwargs.pop("generic_arguments", {}))
677
-
678
- # Add all arguments to dynamic class as data members.
679
-
680
- start_time = time.time()
681
- persist = kwargs.get("persist", False)
682
- volatile = kwargs.get("volatile", False)
683
-
684
- # Validate local_order_column argument type and values.
685
- arg_info_matrix = [["persist", persist, True, bool], ["volatile", volatile, True, bool]]
686
- # Check for valid types and values.
687
- _Validators._validate_function_arguments(arg_info_matrix)
688
-
689
- if persist and volatile:
690
- raise TeradataMlException(
691
- Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH, "persist", "volatile"),
692
- MessageCodes.CANNOT_USE_TOGETHER_WITH)
693
-
694
- self._dyn_cls_data_members.update(kwargs)
695
-
696
- if not skip_input_arg_processing:
697
- self._process_input_argument(**kwargs)
698
-
699
- if not skip_output_arg_processing:
700
- self._process_output_argument(**kwargs)
701
-
702
- if not skip_other_arg_processing:
703
- self._process_other_argument(**kwargs)
704
-
705
- self._generate_query(volatile=volatile)
706
-
707
- # Print SQL-MR query if requested to do so.
708
- if display.print_sqlmr_query:
709
- print(self.sqlmr_query)
710
-
711
- self._execute_query(persist, volatile)
712
-
713
- if not skip_func_output_processing:
714
- self._process_function_output(**kwargs)
715
-
716
- # Set the build time.
717
- self.__build_time = time.time() - start_time
718
-
719
- if not skip_dyn_cls_processing:
720
- return self._create_dynamic_class()
721
-
722
- def _quote_collapse_other_args(self, argument, arg_value):
723
- """
724
- DESCRIPTION:
725
- Given a list as an argument this will single quote all the
726
- list elements and combine them into a single string separated by
727
- commas.
728
- Append single quote to the elements which are required to be quoted.
729
-
730
- PARAMETERS:
731
- argument:
732
- Required Argument.
733
- Specifies the argument object (_AnlyArgumentBase).
734
- Types: _AnlyFuncArgument
735
-
736
- arg_value:
737
- Required Argument.
738
- Specifies the arg_value to be quoted and combined.
739
- Types: list OR string OR int OR bool OR float
740
-
741
- RETURNS:
742
- None
743
-
744
- RAISES:
745
- None
746
-
747
- EXAMPLES:
748
- self._quote_collapse_other_args(argument, arg_value)
749
- """
750
- if isinstance(argument.get_data_type(), list):
751
- if isinstance(arg_value, (str, bool)):
752
- return UtilFuncs._teradata_collapse_arglist(arg_value, "'")
753
- else:
754
- return UtilFuncs._teradata_collapse_arglist(arg_value, "")
755
- else:
756
- if (argument.get_data_type().lower() in ("column", "columns", "column_names", "string", "boolean")):
757
- return UtilFuncs._teradata_collapse_arglist(arg_value, "'")
758
- else:
759
- return UtilFuncs._teradata_collapse_arglist(arg_value, "")
760
-
761
- class _SQLEFunctionExecutor(_AnlyticFunctionExecutor):
762
- """ Class to hold the attributes and provide methods to enable function execution. """
763
- def __init__(self, func_name, func_type=TeradataAnalyticFunctionTypes.SQLE.value):
764
- """
765
- DESCRIPTION:
766
- Constructor for the class.
767
-
768
- PARAMETERS:
769
- func_name:
770
- Required Argument.
771
- Specifies the name of the analytic function, which is exposed to user.
772
- Types: str
773
-
774
- func_type:
775
- Optional Argument.
776
- Specifies the type of the analytic function.
777
- Types: str
778
-
779
- RAISES:
780
- TypeError OR ValueError OR TeradataMlException
781
-
782
- EXAMPLES:
783
- _SQLEFunctionExecutor("AdaBoost")
784
- """
785
- super().__init__(func_name, func_type)
786
-
787
- # Lists to hold Input Argument (Table) Information
788
- self._func_input_arg_sql_names = []
789
- self._func_input_table_view_query = []
790
- self._func_input_dataframe_type = []
791
- self._func_input_distribution = []
792
- self._func_input_partition_by_cols = []
793
- self._func_input_order_by_cols = []
794
- self._func_input_local_order = []
795
-
796
- def _generate_query(self, volatile=False):
797
- """
798
- DESCRIPTION:
799
- Function to generate the SQL query for SQLE analytic function.
800
-
801
- PARAMETERS:
802
- volatile:
803
- Optional Argument.
804
- Specifies whether to create a volatile table or not.
805
- Default Value: False
806
- Type: bool
807
-
808
- RETURNS:
809
- None.
810
-
811
- RAISES:
812
- None.
813
-
814
- EXAMPLES:
815
- self._generate_query()
816
- """
817
-
818
- self.__aqg_obj = AnalyticQueryGenerator(function_name=self._metadata.sql_function_name,
819
- func_input_arg_sql_names=self._func_input_arg_sql_names,
820
- func_input_table_view_query=self._func_input_table_view_query,
821
- func_input_dataframe_type=self._func_input_dataframe_type,
822
- func_input_distribution=self._func_input_distribution,
823
- func_input_partition_by_cols=self._func_input_partition_by_cols,
824
- func_input_order_by_cols=self._func_input_order_by_cols,
825
- func_other_arg_sql_names=self._func_other_arg_sql_names,
826
- func_other_args_values=self._func_other_args,
827
- func_other_arg_json_datatypes=self._func_other_arg_json_datatypes,
828
- func_output_args_sql_names=self._func_output_args_sql_names,
829
- func_output_args_values=self._func_output_args,
830
- engine="ENGINE_SQL",
831
- volatile_output=volatile,
832
- skip_config_lookup=True,
833
- func_input_local_order=self._func_input_local_order)
834
-
835
- # Invoke call to SQL-MR generation.
836
- self.sqlmr_query = self.__aqg_obj._gen_sqlmr_select_stmt_sql()
837
-
838
- def _get_input_args(self, **kwargs):
839
- """
840
- DESCRIPTION:
841
- Function to get input argument(s).
842
-
843
- PARAMETERS:
844
- kwargs:
845
- Specifies the keyword arguments passed to a function.
846
-
847
- RETURNS:
848
- tuple, element1 represents input DataFrame argument name and
849
- second element represents the Input Argument metadata.
850
-
851
- RAISES:
852
- None.
853
-
854
- EXAMPLES:
855
- self._get_input_args()
856
- """
857
- for _inp_attribute in self._metadata.input_tables:
858
- input_table_arg = _inp_attribute.get_lang_name()
859
- yield input_table_arg, _inp_attribute
860
-
861
- # Check if SQLE function allows multiple values as input.
862
- if _inp_attribute.allows_lists():
863
- _index = 1
864
- while True:
865
- _input_table_arg = "{}{}".format(input_table_arg, _index)
866
- # If the corresponding object is available in kwargs, then extract it.
867
- # Otherwise, stop looking for multiple arguments and proceed for next attribute.
868
- if _input_table_arg in kwargs:
869
- yield _input_table_arg, _inp_attribute
870
- _index = _index + 1
871
- else:
872
- break
873
-
874
- def _process_input_argument(self, **kwargs):
875
- """
876
- DESCRIPTION:
877
- Function to process input argument(s).
878
-
879
- PARAMETERS:
880
- kwargs:
881
- Specifies the keyword arguments passed to a function.
882
-
883
- RETURNS:
884
- None.
885
-
886
- RAISES:
887
- None.
888
-
889
- EXAMPLES:
890
- self._process_input_argument()
891
- """
892
- for input_table_arg, input_attribute in self._get_input_args(**kwargs):
893
- partition_column_arg = "{}_partition_column".format(input_table_arg)
894
- order_column_arg = "{}_order_column".format(input_table_arg)
895
- local_order_column_arg = "local_order_{}".format(input_table_arg)
896
- hash_column_arg = "{}_hash_column".format(input_table_arg)
897
-
898
- # Get the argument values from kwargs
899
- input_table_arg_value = kwargs.get(input_table_arg)
900
- partition_column_arg_value = kwargs.get(partition_column_arg)
901
- order_column_arg_value = kwargs.get(order_column_arg)
902
- local_order_column_arg_value = kwargs.get(local_order_column_arg, False)
903
- hash_column_arg_value = kwargs.get(hash_column_arg)
904
-
905
- reference_class = None
906
- if input_attribute.is_reference_function_acceptable():
907
- reference_class = self._metadata.get_reference_function_class()
908
-
909
- # Validate the input table arguments.
910
- self._validate_analytic_function_argument(
911
- input_table_arg, input_table_arg_value, input_attribute, additional_valid_types=reference_class)
912
-
913
- # If input is an object of reference Function, then get the DataFrame from it.
914
- if reference_class and isinstance(input_table_arg_value, reference_class):
915
- input_table_arg_value = input_table_arg_value._mlresults[0]
916
- # Don't fill the input lists if the value is None.
917
- if input_table_arg_value is None:
918
- continue
919
-
920
- # Validate local_order_column argument type and values.
921
- arg_info_matrix = [[local_order_column_arg, local_order_column_arg_value, True, bool, True]]
922
- # Check emptiness and types.
923
- _Validators._validate_missing_required_arguments(arg_info_matrix)
924
- _Validators._validate_function_arguments(arg_info_matrix)
925
-
926
- for arg in [partition_column_arg, order_column_arg, hash_column_arg]:
927
- arg_value = kwargs.get(arg)
928
-
929
- expected_types = (str, list)
930
- # For partition column, user can pass partition kind too.
931
- if "partition" in arg:
932
- expected_types = (str, _ListOf(str), PartitionKind)
933
- arg_info_matrix = [[arg, arg_value, True, expected_types, True]]
934
-
935
- # Check for empty string and datatype.
936
- _Validators._validate_missing_required_arguments(arg_info_matrix)
937
- _Validators._validate_function_arguments(arg_info_matrix)
938
-
939
- # Set order column value to "NA_character_" if it is None.
940
- if not isinstance(arg_value, PartitionKind):
941
- # Validate column existence in DataFrame only if user inputs a column(s).
942
- _Validators._validate_dataframe_has_argument_columns(arg_value,
943
- arg,
944
- input_table_arg_value,
945
- input_table_arg
946
- )
947
-
948
- order_column_arg_value = UtilFuncs._teradata_collapse_arglist(order_column_arg_value, "\"")
949
-
950
- distribution, partition_column = self._get_distribution_and_partition_column(
951
- partition_column_arg_value, hash_column_arg_value, input_attribute)
952
-
953
- table_ref = AnalyticsWrapperUtils()._teradata_on_clause_from_dataframe(
954
- input_table_arg_value, False)
955
-
956
- self._func_input_arg_sql_names.append(input_attribute.get_sql_name())
957
- self._func_input_table_view_query.append(table_ref["ref"])
958
- self._func_input_dataframe_type.append(table_ref["ref_type"])
959
- self._func_input_order_by_cols.append(order_column_arg_value)
960
- self._func_input_distribution.append(distribution)
961
- self._func_input_partition_by_cols.append(partition_column)
962
- self._func_input_local_order.append(local_order_column_arg_value)
963
-
964
- def _get_distribution_and_partition_column(self,
965
- partition_column_arg_value,
966
- hash_column_arg_value,
967
- input_attribute):
968
- """
969
- DESCRIPTION:
970
- Function to get the input distribution and partition column values to
971
- process input table argument.
972
-
973
- PARAMETERS:
974
- partition_column_arg_value:
975
- Required Argument.
976
- Specifies the partition column argument value.
977
- Types: str OR PartitionKind OR None.
978
-
979
- hash_column_arg_value:
980
- Required Argument.
981
- Specifies the hash column argument value.
982
- Types: str
983
-
984
- input_attribute:
985
- Required Argument.
986
- Specifies the input table attribute.
987
- Types: _AnlyFuncInput
988
-
989
- RETURNS:
990
- tuple, with first element represents distribution and second element
991
- represents partition_column.
992
-
993
- RAISES:
994
- None.
995
-
996
- EXAMPLES:
997
- self._get_distribution_and_partition_column(partition_column_arg_val, hash_column_arg_val)
998
- """
999
- # If user passes hash_column_argument, generate the Query based on HASH BY
1000
- # irrespective of the value of partition_column.
1001
- if hash_column_arg_value:
1002
- return "HASH", UtilFuncs._teradata_collapse_arglist(hash_column_arg_value, "\"")
1003
-
1004
- # If user passes PartitionKind, generate Query based on distribution and partition type.
1005
- if isinstance(partition_column_arg_value, PartitionKind):
1006
- return self.__get_dist_partition_column_from_partition_kind(partition_column_arg_value)
1007
-
1008
- # If user pass a string or list of strings for partition_column, generate PARTITION BY
1009
- # based on partition column value.
1010
- if partition_column_arg_value is not None:
1011
- return "FACT", UtilFuncs._teradata_collapse_arglist(partition_column_arg_value, "\"")
1012
- # No partition_column is sourced to input. So, derive the default one.
1013
- else:
1014
- default = input_attribute._get_default_partition_column_kind()
1015
- return self.__get_dist_partition_column_from_partition_kind(default)
1016
-
1017
- def __get_dist_partition_column_from_partition_kind(self, partition_kind):
1018
- """
1019
- DESCRIPTION:
1020
- Function to get the distribution and partition column based on PartitionKind.
1021
-
1022
- PARAMETERS:
1023
- partition_kind:
1024
- Required Argument.
1025
- Specifies the type of Partition.
1026
- Types: PartitionKind
1027
-
1028
- RETURNS:
1029
- tuple, with first element represents distribution and second element
1030
- represents partition_column.
1031
-
1032
- RAISES:
1033
- None.
1034
-
1035
- EXAMPLES:
1036
- self.__get_dist_partition_column_from_partition_kind(PartitionKind.ONE)
1037
- """
1038
- if partition_kind in (PartitionKind.ANY, PartitionKind.ONE):
1039
- return "FACT", partition_kind.value
1040
- elif partition_kind == PartitionKind.DIMENSION:
1041
- return PartitionKind.DIMENSION.value, None
1042
- # Else is for PartitionKind.NONE.
1043
- else:
1044
- return "NONE", "NA_character_"
1045
-
1046
- # Below code is not being used. Kept here to refer again.
1047
- '''
1048
- def _get_input_distribution_and_partition_column(self, input_table, partition_column_arg_value):
1049
- """
1050
- DESCRIPTION:
1051
- Function to get the input distribution and partition column values to
1052
- process input table argument.
1053
-
1054
- PARAMETERS:
1055
- input_table:
1056
- Required Argument.
1057
- Specifies the input table argument.
1058
- Types: _AnlyFuncInput
1059
-
1060
- partition_column_arg_value:
1061
- Required Argument.
1062
- Specifies the partition column argument value.
1063
- Types: str
1064
-
1065
- RETURNS:
1066
- tuple, with first element represents distribution and second element
1067
- represents partition_column.
1068
-
1069
- RAISES:
1070
- None.
1071
-
1072
- EXAMPLES:
1073
- self._get_input_distribution_and_partition_column(inp1, partition_column_arg)
1074
- """
1075
- # Initialise all the temporary variables and set those to False by default.
1076
- is_dimension, is_partition_by_key, is_partition_by_any, is_partition_by_one = [False] * 4
1077
- is_partition_by_one_only, is_partition_by_any_only = [False] * 2
1078
-
1079
- # Get the partition kind from input table.
1080
- partition_kind = input_table._get_partition_column_required_kind()
1081
-
1082
- # Check whether associated input table requires to be partitioned
1083
- # on any column or not.
1084
- # Set some booleans based on what type of distribution is supported by
1085
- # the argument.
1086
- if partition_kind == PartitionKind.DIMENSION:
1087
- is_dimension = True
1088
- elif partition_kind == PartitionKind.DIMENSIONKEY:
1089
- is_dimension, is_partition_by_key = True, True
1090
- elif partition_kind == PartitionKind.DIMENSIONKEYANY:
1091
- is_dimension, is_partition_by_any, is_partition_by_key = True, True, True
1092
- elif partition_kind == PartitionKind.KEY:
1093
- is_partition_by_key = True
1094
- elif partition_kind == PartitionKind.ONE:
1095
- is_partition_by_one, is_partition_by_key = True, True
1096
- elif partition_kind == PartitionKind.ANY:
1097
- is_partition_by_any, is_partition_by_key = True, True
1098
- elif partition_kind == PartitionKind.ANYONLY:
1099
- is_partition_by_any_only = True
1100
- elif partition_kind == PartitionKind.ONEONLY:
1101
- is_partition_by_one_only = True
1102
-
1103
- collapse_arg_list = lambda partition_column_arg_value: "NA_character_" if partition_column_arg_value is None\
1104
- else UtilFuncs._teradata_collapse_arglist(partition_column_arg_value, "\"")
1105
-
1106
- default_partition_value = input_table._get_default_partition_by_value(partition_kind)
1107
-
1108
- # When distribution is of type dimension, no partition by column required.
1109
- if is_dimension and not is_partition_by_key and not is_partition_by_any:
1110
- distribution = "DIMENSION"
1111
- partition_column = "NA_character_"
1112
- # When partitioned by either key or any, distribution should be FACT.
1113
- elif is_dimension and (is_partition_by_key or is_partition_by_any):
1114
- # If the input is not None, then distribution should be FACT. Otherwise, DIMENSION.
1115
- distribution = "DIMENSION"
1116
- if (partition_column_arg_value is not None and is_partition_by_key):
1117
- distribution = "FACT"
1118
-
1119
- # Quote if input value is not same as default value.
1120
- if self._awu._is_default_or_not(partition_column_arg_value, default_partition_value):
1121
- partition_column = collapse_arg_list(partition_column_arg_value)
1122
- else:
1123
- partition_column = default_partition_value
1124
-
1125
- elif partition_column_arg_value is not None and not is_partition_by_key and is_partition_by_any:
1126
- distribution = "FACT"
1127
- partition_column = "ANY"
1128
- else:
1129
- partition_column = "NA_character_"
1130
- else:
1131
- # When partitioned by either key or any, distribution should be FACT.
1132
- if is_partition_by_any and not is_partition_by_key:
1133
- distribution = "FACT"
1134
- partition_column = "ANY"
1135
- elif (is_partition_by_key and not is_partition_by_any and not is_partition_by_one) or\
1136
- (is_partition_by_key and is_partition_by_any):
1137
- distribution = "FACT"
1138
- # If partition value is default value, Enclose it with double quotes.
1139
- if default_partition_value is not None or default_partition_value != "":
1140
- if self._awu._is_default_or_not(partition_column_arg_value, default_partition_value):
1141
- partition_column = collapse_arg_list(partition_column_arg_value)
1142
- else:
1143
- partition_column = default_partition_value
1144
- else:
1145
- partition_column = UtilFuncs._teradata_collapse_arglist(partition_column_arg_value, "\"")
1146
- elif is_partition_by_one:
1147
- distribution = "FACT"
1148
- # If partition value is 1, Enclose it with double quotes.
1149
- if self._awu._is_default_or_not(partition_column_arg_value, "1"):
1150
- partition_column = collapse_arg_list(partition_column_arg_value)
1151
- else:
1152
- partition_column = default_partition_value
1153
- elif is_partition_by_any_only or is_partition_by_one_only:
1154
- distribution = "FACT"
1155
- partition_column = "{}".format(default_partition_value)
1156
- else:
1157
- distribution = "NONE"
1158
- partition_column = "NA_character_"
1159
-
1160
- return distribution, partition_column
1161
- '''
1162
-
1163
- def _process_function_output(self, **kwargs):
1164
- """
1165
- DESCRIPTION:
1166
- Internal function to process the output tables. This function creates
1167
- the required output DataFrames from the tables and a result list.
1168
-
1169
- PARAMETERS:
1170
- kwargs:
1171
- Specifies the keyword arguments passed to a function.
1172
-
1173
- RETURNS:
1174
- None.
1175
-
1176
- RAISES:
1177
- None.
1178
-
1179
- EXAMPLES:
1180
- self._process_function_output()
1181
- """
1182
- for lang_name, table_name in self._function_output_table_map.items():
1183
- out_table_name = UtilFuncs._extract_table_name(table_name)
1184
- out_db_name = UtilFuncs._extract_db_name(table_name)
1185
- df = self._awu._create_data_set_object(
1186
- df_input=out_table_name, database_name=out_db_name, source_type="table")
1187
- self._dyn_cls_data_members[lang_name] = df
1188
- # Condition make sure that the first element always be result or output in _mlresults.
1189
- if lang_name in ["output", "result"]:
1190
- self._mlresults.insert(0, df)
1191
- else:
1192
- self._mlresults.append(df)
1193
-
1194
- class _TableOperatorExecutor(_SQLEFunctionExecutor):
1195
- """ Class to hold the attributes and provide methods to enable execution for Table Operators. """
1196
- def __init__(self, func_name):
1197
- """
1198
- DESCRIPTION:
1199
- Constructor for the class.
1200
-
1201
- PARAMETERS:
1202
- func_name:
1203
- Required Argument.
1204
- Specifies the name of the analytic function, which is exposed to the user.
1205
- Types: str
1206
-
1207
- RAISES:
1208
- TypeError OR ValueError OR TeradataMlException
1209
-
1210
- EXAMPLES:
1211
- _TableOperatorExecutor("write_nos")
1212
- """
1213
- super().__init__(func_name, TeradataAnalyticFunctionTypes.TABLEOPERATOR.value)
1214
-
1215
- # Lists to hold Input Argument (Table) Information
1216
- self.__func_input_order_by_type = []
1217
- self.__func_input_sort_ascending = []
1218
- self.__func_input_nulls_first = []
1219
-
1220
- def _generate_query(self, **kwargs):
1221
- """
1222
- DESCRIPTION:
1223
- Function to generate the SQL query for TABLE OPERATOR function.
1224
-
1225
- RETURNS:
1226
- None.
1227
-
1228
- RAISES:
1229
- None.
1230
-
1231
- EXAMPLES:
1232
- self._generate_query()
1233
- """
1234
- self.__aqg_obj = TableOperatorQueryGenerator(function_name=self.func_name,
1235
- func_input_arg_sql_names=self._func_input_arg_sql_names,
1236
- func_input_table_view_query=self._func_input_table_view_query,
1237
- func_input_dataframe_type=self._func_input_dataframe_type,
1238
- func_input_distribution=self._func_input_distribution,
1239
- func_input_partition_by_cols=self._func_input_partition_by_cols,
1240
- func_input_order_by_cols=self._func_input_order_by_cols,
1241
- func_other_arg_sql_names=self._func_other_arg_sql_names,
1242
- func_other_args_values=self._func_other_args,
1243
- func_other_arg_json_datatypes=self._func_other_arg_json_datatypes,
1244
- func_output_args_sql_names=self._func_output_args_sql_names,
1245
- func_output_args_values=self._func_output_args,
1246
- func_input_order_by_type=self.__func_input_order_by_type,
1247
- func_input_sort_ascending=self.__func_input_sort_ascending,
1248
- func_input_nulls_first=self.__func_input_nulls_first,
1249
- engine="ENGINE_SQL")
1250
-
1251
- # Invoke call to SQL-MR generation.
1252
- self.sqlmr_query = self.__aqg_obj._gen_table_operator_select_stmt_sql()
1253
-
1254
- def _process_input_argument(self, **kwargs):
1255
- """
1256
- DESCRIPTION:
1257
- Function to process input argument(s).
1258
-
1259
- PARAMETERS:
1260
- kwargs:
1261
- Specifies the keyword arguments passed to a function.
1262
-
1263
- RETURNS:
1264
- None.
1265
-
1266
- RAISES:
1267
- None.
1268
-
1269
- EXAMPLES:
1270
- self._process_input_argument()
1271
- """
1272
- super()._process_input_argument(**kwargs)
1273
- # Iterating over multiple input arguments if present.
1274
- for index, input_attribute in enumerate(self._metadata.input_tables):
1275
- # Extracting input argument name and value.
1276
- input_table_arg = input_attribute.get_lang_name()
1277
- input_table_arg_value = kwargs.get(input_table_arg)
1278
- # No need to process further if no input argument.
1279
- # Validation of this input is done in the parent class.
1280
- if input_table_arg_value is None:
1281
- continue
1282
-
1283
- # Extracting argument names for partition, hash and is local ordered.
1284
- partition_column_arg = "{}_partition_column".format(input_table_arg)
1285
- hash_column_arg = "{}_hash_column".format(input_table_arg)
1286
- is_local_ordered_arg = "local_order_{}".format(input_table_arg)
1287
- order_column_arg = "{}_order_column".format(input_table_arg)
1288
- # Extracting argument values for partition, hash and is local ordered.
1289
- partition_column_value = kwargs.get(partition_column_arg, None)
1290
- hash_column_value = kwargs.get(hash_column_arg, None)
1291
- is_local_ordered_value = kwargs.get(is_local_ordered_arg, False)
1292
- order_column_value = kwargs.get(order_column_arg, "NA_character_")
1293
-
1294
- self._validate_hash_local_ordered_arguments(partition_column_arg, partition_column_value,
1295
- hash_column_arg, hash_column_value,
1296
- is_local_ordered_arg, is_local_ordered_value,
1297
- order_column_arg, order_column_value,
1298
- input_table_arg, input_table_arg_value)
1299
-
1300
- if is_local_ordered_value:
1301
- self.__func_input_order_by_type.append("LOCAL")
1302
- if hash_column_value is None:
1303
- self._func_input_distribution[index] = "NONE"
1304
- else:
1305
- self._func_input_distribution[index] = "HASH"
1306
- self._func_input_partition_by_cols[index] = hash_column_value
1307
- else:
1308
- self.__func_input_order_by_type.append(None)
1309
- if partition_column_value is None:
1310
- self._func_input_distribution[index] = "NONE"
1311
-
1312
- def _validate_hash_local_ordered_arguments(self, partition_column_arg, partition_column_value,
1313
- hash_column_arg, hash_column_value,
1314
- is_local_ordered_arg, is_local_ordered_value,
1315
- order_column_arg, order_column_value,
1316
- input_table_arg, input_table_arg_value):
1317
- """
1318
- DESCRIPTION:
1319
- Function to validate the hash and local order function arguments. This function does
1320
- the following validations
1321
- * Check if Hash Column value is not empty string.
1322
- * Check if "is local order" value is of type boolean.
1323
- * Hash and order by can be used together as long as is_local_order = True.
1324
- * Either hash or partition can be used.
1325
- * Either local order by or partition by can be used.
1326
-
1327
- PARAMETERS:
1328
- partition_column_arg:
1329
- Required Argument.
1330
- Specifies the name of the partition by column argument.
1331
- Type: str
1332
-
1333
- partition_column_value:
1334
- Required Argument.
1335
- Specifies the value of the partition by column argument.
1336
- Type: str
1337
-
1338
- hash_column_arg:
1339
- Required Argument.
1340
- Specifies the name of the hash by column argument.
1341
- Type: str
1342
-
1343
- hash_column_value:
1344
- Required Argument.
1345
- Specifies the value of the hash by column argument.
1346
- Type: str
1347
-
1348
- is_local_ordered_arg:
1349
- Required Argument.
1350
- Specifies the name of the is local ordered argument.
1351
- Type: str
1352
-
1353
- is_local_ordered_value:
1354
- Required Argument.
1355
- Specifies the value of the is local ordered argument.
1356
- Type: bool
1357
-
1358
- order_column_arg:
1359
- Required Argument.
1360
- Specifies the name of the order by column argument.
1361
- Type: str
1362
-
1363
- order_column_value:
1364
- Required Argument.
1365
- Specifies the value of the ordere by column argument.
1366
- Type: str
1367
-
1368
- input_table_arg:
1369
- Required Argument.
1370
- Specifies the name of the input table provided to the function.
1371
- Types: str
1372
-
1373
- input_table_arg_value:
1374
- Required Argument.
1375
- Specifies the value of the input table provided to the function.
1376
- Types: DataFrame
1377
-
1378
- RETURNS:
1379
- None
1380
-
1381
- RAISES:
1382
- TeradataMlException
1383
-
1384
- EXAMPLES:
1385
- self._validate_hash_local_ordered_arguments("data", DataFrame.from_table("ibm_stock")), **kwargs)
1386
- """
1387
- # Check for empty string and types(str or list) for hash column values.
1388
- # Check for types for is local ordered values.
1389
-
1390
- _Validators._validate_function_arguments([[hash_column_arg, hash_column_value, True, (str, list), True],
1391
- [is_local_ordered_arg, is_local_ordered_value, True, bool, False]])
1392
-
1393
- # Validate column existence in DataFrame.
1394
- _Validators._validate_dataframe_has_argument_columns(hash_column_value,
1395
- hash_column_arg,
1396
- input_table_arg_value,
1397
- input_table_arg
1398
- )
1399
-
1400
- # Hash and order by can be used together as long as is_local_order = True.
1401
- if all([hash_column_value,
1402
- order_column_value]) and not is_local_ordered_value:
1403
- raise TeradataMlException(
1404
- Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH,
1405
- "{}' and '{}".format(hash_column_arg, order_column_arg),
1406
- "{}=False".format(is_local_ordered_arg)),
1407
- MessageCodes.CANNOT_USE_TOGETHER_WITH)
1408
-
1409
- # Either hash or partition can be used.
1410
- _Validators._validate_mutually_exclusive_arguments(hash_column_value,
1411
- hash_column_arg,
1412
- partition_column_value,
1413
- partition_column_arg,
1414
- skip_all_none_check=True)
1415
-
1416
- # Either local order by or partition by can be used.
1417
- _Validators._validate_mutually_exclusive_arguments(is_local_ordered_value,
1418
- is_local_ordered_arg,
1419
- partition_column_value,
1420
- partition_column_arg,
1421
- skip_all_none_check=True)
1422
-
1423
- # local order by requires column name.
1424
- if is_local_ordered_value and order_column_value is None:
1425
- message = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING,
1426
- order_column_arg, "{}=True".format(is_local_ordered_arg))
1427
- raise TeradataMlException(message, MessageCodes.DEPENDENT_ARG_MISSING)
1428
-
1429
- def _quote_collapse_other_args(self, argument, arg_value):
1430
- """
1431
- DESCRIPTION:
1432
- Given a list as an argument this will single quote all the
1433
- list elements and combine them into a single string separated by
1434
- commas.
1435
- Append single quote to the elements which are required to be quoted.
1436
-
1437
- PARAMETERS:
1438
- argument:
1439
- Required Argument.
1440
- Specifies the argument object (_AnlyArgumentBase).
1441
- Types: _AnlyFuncArgument
1442
-
1443
- arg_value:
1444
- Required Argument.
1445
- Specifies the arg_value to be quoted and combined.
1446
- Types: list OR string OR int OR bool OR float
1447
-
1448
- RETURNS:
1449
- None
1450
-
1451
- RAISES:
1452
- None
1453
-
1454
- EXAMPLES:
1455
- self._quote_collapse_other_args(argument, arg_value)
1456
- """
1457
- arg_dtype = UtilFuncs._as_list(argument.get_data_type())
1458
- for arg in arg_dtype:
1459
- if arg.lower() in ("column", "columns", "column_names", "string", "boolean") and isinstance(arg_value,(str, bool)):
1460
- return UtilFuncs._teradata_collapse_arglist(UtilFuncs._teradata_collapse_arglist(arg_value, "\'"), "'")
1461
- else:
1462
- return UtilFuncs._teradata_collapse_arglist(arg_value, "'")
1463
-
1464
- class _UAFFunctionExecutor(_SQLEFunctionExecutor):
1465
- """ Class to hold the attributes and provide methods to enable execution for UAF Functions. """
1466
- def __init__(self, func_name):
1467
- """
1468
- DESCRIPTION:
1469
- Constructor for the class.
1470
-
1471
- PARAMETERS:
1472
- func_name:
1473
- Required Argument.
1474
- Specifies the name of the analytic function, which is exposed to the user.
1475
- Types: str
1476
-
1477
- RAISES:
1478
- TypeError OR ValueError OR TeradataMlException
1479
-
1480
- EXAMPLES:
1481
- _UAFFunctionExecutor("ArimaEstimate")
1482
- """
1483
- super().__init__(func_name, TeradataAnalyticFunctionTypes.UAF.value)
1484
- self._func_other_args = {}
1485
- self._func_input_fmt_arguments = {}
1486
- self._func_output_fmt_arguments = {}
1487
-
1488
- # Lists to hold Input Argument (Table) Information
1489
- self._func_input_args = []
1490
- self._func_input_filter_expr_args = []
1491
-
1492
- # Lists to hold Output Table Information.
1493
- self._func_output_args = None
1494
- self._function_output_table_map = {}
1495
- self._volatile_output = False
1496
-
1497
- def _generate_query(self, volatile=False):
1498
- """
1499
- DESCRIPTION:
1500
- Function to generate the SQL query for UAF function.
1501
-
1502
- RETURNS:
1503
- None.
1504
-
1505
- RAISES:
1506
- None.
1507
-
1508
- EXAMPLES:
1509
- self._generate_query()
1510
- """
1511
- query_generator = UAFQueryGenerator(function_name=self._metadata.sql_function_name,
1512
- func_input_args=self._func_input_args,
1513
- func_input_filter_expr_args=self._func_input_filter_expr_args,
1514
- func_other_args=self._func_other_args ,
1515
- func_output_args=self._func_output_args,
1516
- func_input_fmt_args=self._func_input_fmt_arguments,
1517
- func_output_fmt_args=self._func_output_fmt_arguments,
1518
- volatile_output=volatile)
1519
- self.sqlmr_query= query_generator._get_display_uaf()
1520
-
1521
- def _process_input_argument(self, **kwargs):
1522
- """
1523
- DESCRIPTION:
1524
- Function to process input argument(s).
1525
-
1526
- PARAMETERS:
1527
- kwargs:
1528
- Specifies the keyword arguments passed to a function.
1529
-
1530
- RETURNS:
1531
- None.
1532
-
1533
- RAISES:
1534
- None.
1535
-
1536
- EXAMPLES:
1537
- self._process_input_argument()
1538
- """
1539
- from teradataml.dataframe.sql_interfaces import ColumnExpression
1540
-
1541
- # Process the Input tables.
1542
- # Get the list of input arguments from the JsonStore metadata
1543
- for input_attribute in self._metadata.input_tables:
1544
- # Get the input table arg name.
1545
- input_table_arg = input_attribute.get_lang_name()
1546
-
1547
- # Get the value of input table arg.
1548
- input_table_arg_value = kwargs.get(input_table_arg, None)
1549
- self._validate_analytic_function_argument(input_table_arg,
1550
- input_table_arg_value,
1551
- input_attribute)
1552
-
1553
- # Form the 'filter_expr' key name (User provided input).
1554
- filter_exp_arg = "{}_filter_expr".format(input_table_arg)
1555
- # Get the 'filter_expr' value.
1556
- filter_exp_arg_value = kwargs.get(filter_exp_arg, None)
1557
-
1558
- # If 'filter_expr' is passed and 'data' is None, raise
1559
- # dependent argument exception.
1560
- if filter_exp_arg_value is not None and \
1561
- input_table_arg_value is None:
1562
- # Raise error, if "data" not provided and "data_filter_expr" is provided.
1563
- err_ = Messages.get_message(MessageCodes.DEPENDENT_ARGUMENT,
1564
- filter_exp_arg,
1565
- input_table_arg)
1566
- raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARGUMENT)
1567
-
1568
- # 'filter_expr' argument validation (User provided input).
1569
- arg_info = []
1570
- arg_info.append([filter_exp_arg, filter_exp_arg_value, True,
1571
- (ColumnExpression), False])
1572
-
1573
- # Validate 'filter_expr' argument types (User provided input).
1574
- _Validators._validate_function_arguments(arg_info)
1575
-
1576
- # If data is not None, then add 'data' and 'filter_expr' to lists.
1577
- if input_table_arg_value is not None:
1578
- # Append the lists.
1579
- self._func_input_args.append(input_table_arg_value)
1580
- self._func_input_filter_expr_args.append(filter_exp_arg_value)
1581
-
1582
- def _process_function_output(self, **kwargs):
1583
- """
1584
- DESCRIPTION:
1585
- Internal function to process the output tables. This function creates
1586
- the required output DataFrames from the tables and a result list.
1587
-
1588
- PARAMETERS:
1589
- None.
1590
-
1591
- RETURNS:
1592
- None.
1593
-
1594
- RAISES:
1595
- None.
1596
-
1597
- EXAMPLES:
1598
- self._process_function_output()
1599
- """
1600
- volatile = kwargs.get("volatile", False)
1601
- persist = kwargs.get("persist", False)
1602
- output_db_name = kwargs.get("output_db_name")
1603
-
1604
- # Since the regular function will always refer to latest value, creating
1605
- # a closure here. The function will go as an attribute to dynamically
1606
- # created object.
1607
- def _parent(layer_name, table_name, query=None):
1608
-
1609
- def _layer(self):
1610
- if self._data.get(layer_name) is None:
1611
- from teradataml import DataFrame, in_schema
1612
- # Execute the Query, create a DataFrame and attach it.
1613
- if query:
1614
- UtilFuncs._execute_query(query=query)
1615
- _db_name, _table_name = UtilFuncs._extract_db_name(table_name), \
1616
- UtilFuncs._extract_table_name(table_name)
1617
- _table_name = in_schema(_db_name, _table_name) if _db_name else _table_name
1618
- self._data[layer_name] = DataFrame.from_table(table_name)
1619
-
1620
- return self._data[layer_name]
1621
-
1622
- return _layer
1623
-
1624
- for output_table in self._metadata.output_tables[1:]:
1625
- layer_name = output_table.get_layer_name()
1626
- exposed_layer_name = output_table.get_lang_name()
1627
-
1628
- # Creating the ART Spec here instead of creating an object of TDSeries to
1629
- # save additional imports and processing.
1630
- _art_spec = "ART_SPEC(TABLE_NAME({}), LAYER({}))".format(self._function_output_table_map["result"],
1631
- layer_name)
1632
-
1633
- # Generate table name.
1634
- func_params = self._get_generate_temp_table_params(persist=persist,
1635
- output_db=output_db_name)
1636
- _table_name = UtilFuncs._generate_temp_table_name(**func_params)
1637
-
1638
- # Generate Query.
1639
- UAF_Query = UAFQueryGenerator(function_name="TD_EXTRACT_RESULTS",
1640
- func_input_args=_art_spec,
1641
- func_input_filter_expr_args={},
1642
- func_other_args={},
1643
- func_input_fmt_args={},
1644
- func_output_args=_table_name,
1645
- volatile_output=volatile,
1646
- ctas=True)
1647
-
1648
- query = UAF_Query._get_display_uaf()
1649
-
1650
- # Store the internal function in a dict. While storing it, convert it to
1651
- # a property so user do not need to call it.
1652
- self._dyn_cls_data_members[exposed_layer_name] = property(
1653
- _parent(exposed_layer_name, _table_name, query))
1654
-
1655
- # 'result' attribute in UAF Function object should point to output table.
1656
- self._dyn_cls_data_members["result"] = property(
1657
- _parent("result", self._function_output_table_map["result"]))
1658
-
1659
- # To make lazy execution, we will add additional attributes to UAF Function object.
1660
- # Mask those additional attributes by overwriting the __dir__ method.
1661
- attrs = list(self._dyn_cls_data_members.keys())
1662
- self._dyn_cls_data_members["__dir__"] = lambda self: super(self.__class__).__dir__() + attrs
1663
-
1664
- # Add a variable _data to output object so that the layers DataFrame
1665
- # will be stored in this variable.
1666
- self._dyn_cls_data_members["_data"] = {}
1667
-
1668
- def _get_generate_temp_table_params(self, persist=False, output_db=None, volatile=False):
1669
- """
1670
- DESCRIPTION:
1671
- Function to get the required parameters to create either table or view.
1672
- When function has output table arguments or argument persist is set to True,
1673
- then function returns parameters to create table otherwise returns parameters
1674
- to create view. If persist is set to True or volatile is set to True, in such cases,
1675
- tables created are not garbage collected.
1676
-
1677
- PARAMETERS:
1678
- persist:
1679
- Optional Argument.
1680
- Specifies whether to persist the output table or not.
1681
- When set to True, output tables created are not garbage collected
1682
- at the end of the session, otherwise they are garbage collected.
1683
- Default Value: False
1684
- Types: bool
1685
-
1686
- output_db:
1687
- Optional Argument.
1688
- Specifies the output DataBase name to create the output tables.
1689
- Default Value: False
1690
- Types: str
1691
-
1692
- volatile:
1693
- Optional Argument.
1694
- Specifies whether table to create is a volatile table or not.
1695
- Default Value: False
1696
- Types: bool
1697
-
1698
- RETURNS:
1699
- dict
1700
-
1701
- RAISES:
1702
- None
1703
-
1704
- EXAMPLES:
1705
- self._get_generate_temp_table_params(True, True)
1706
- """
1707
- prefix = "td_uaf_out_"
1708
-
1709
- # If result is to be persisted then, it must not be Garbage collected.
1710
- gc_on_quit = False if persist or volatile else True
1711
-
1712
- return {"table_type": TeradataConstants.TERADATA_TABLE,
1713
- "prefix": prefix,
1714
- "gc_on_quit": gc_on_quit,
1715
- "databasename": output_db if output_db else _get_context_temp_databasename()}
1716
-
1717
- def _process_output_argument(self, **kwargs):
1718
- """
1719
- DESCRIPTION:
1720
- Function to process output argument(s) of UAF function.
1721
-
1722
- PARAMETERS:
1723
- kwargs:
1724
- Specifies the keyword arguments passed to a function.
1725
-
1726
- RETURNS:
1727
- None.
1728
-
1729
- RAISES:
1730
- TypeError, ValueError, TeradataMlException.
1731
-
1732
- EXAMPLES:
1733
- self._process_output_argument()
1734
- """
1735
- # If kwargs not provided, initialize it with default value.
1736
- volatile = kwargs.get("volatile", False)
1737
- persist = kwargs.get("persist", False)
1738
- output_table_name = kwargs.get("output_table_name", None)
1739
- output_db_name = kwargs.get("output_db_name", None)
1740
-
1741
- arg_info = []
1742
- arg_info.append(["volatile", volatile, False, (bool)])
1743
- arg_info.append(["persist", persist, False, (bool)])
1744
- arg_info.append(["output_table_name", output_table_name, True, (str), True])
1745
- arg_info.append(["output_db_name", output_db_name, True, (str), True])
1746
-
1747
- _Validators._validate_function_arguments(arg_info)
1748
-
1749
- # If table is name is not provided by user, generate the temp table name.
1750
- # Else, get fully qualified table name.
1751
- if output_table_name is None:
1752
- # Generate the name of the table, if not provide by user.
1753
- func_params = self._get_generate_temp_table_params(persist=persist,
1754
- output_db=output_db_name,
1755
- volatile=volatile)
1756
-
1757
- # Generate temp table name and add it to garbage collector.
1758
- table_name = UtilFuncs._generate_temp_table_name(**func_params)
1759
- else:
1760
- # If database name is not provided by user, get the default database name
1761
- # else use user provided database name.
1762
- db_name = output_db_name if output_db_name is not None else \
1763
- _get_context_temp_databasename()
1764
-
1765
- # Get the fully qualified table name.
1766
- table_name = "{}.{}".format(UtilFuncs._teradata_quote_arg(db_name,
1767
- "\"", False),
1768
- UtilFuncs._teradata_quote_arg(output_table_name,
1769
- "\"", False))
1770
-
1771
- # If persist is set to False, add the table name to
1772
- # Garbage collector.
1773
- if not persist:
1774
- GarbageCollector._add_to_garbagecollector(table_name)
1775
-
1776
- # Populate the output arg, output table map and volatile output.
1777
- self._func_output_args = table_name
1778
- self._function_output_table_map["result"] = table_name
1779
- self._volatile_output = volatile
1780
-
1781
- def __process_individual_argument(self, argument, **kwargs):
1782
- """
1783
- DESCRIPTION:
1784
- Internal function to process the individual arguments.
1785
- 1. If the argument does not have nested parameters and is present in kwargs,
1786
- the function does the following:
1787
- * Checks the required arguments are passed or not.
1788
- * Checks the type of the arguments are expected or not.
1789
- * Checks for permitted values.
1790
- * Checks for empty string.
1791
- * If validations run fine,
1792
- then returns a dict with the SQL name of the argument as key
1793
- and user provided value as the value.
1794
- * Dictornary without nested parameters is formed as below:
1795
- {arg_sql_name : value}
1796
- 2. If the argument has nested params:
1797
- * Function loops over the nested parameter and calls itself recursively
1798
- on the nested parameters and repeats the process.
1799
- * Dictonary with nested arguments are formed as below:
1800
- { Parent_sql_name : { Child1_sql_name : value, Child2_sql_name : value}}
1801
-
1802
- PARAMETERS:
1803
- argument:
1804
- Required Argument.
1805
- Specifies the argument object (_AnlyFuncArgument).
1806
- Types: _AnlyFuncArgument
1807
-
1808
- kwargs:
1809
- Specifies the keyword arguments passed to a function.
1810
-
1811
- RETURNS:
1812
- None.
1813
-
1814
- RAISES:
1815
- ValueError OR TypeError OR TeradataMlException.
1816
-
1817
- EXAMPLES:
1818
- self._process_other_arguments(argument, arg1="string", arg2="db", arg3=2)
1819
-
1820
- """
1821
- sql_name = argument.get_name()
1822
- lang_name = argument.get_lang_name()
1823
- arg_value = kwargs.get(lang_name)
1824
- # Set the "argument".
1825
- self._spl_func_obj.set_arg_name(argument)
1826
- # Let's get spl handler if function requires.
1827
- special_case_handler = self._spl_func_obj._get_handle()
1828
-
1829
- if len(argument.get_nested_param_list()) == 0:
1830
- self._validate_analytic_function_argument(lang_name, arg_value, argument)
1831
- # If argument is not None and it is not equal to the default value,
1832
- # add the sql_name and arg_value to the dict else return an empty dict
1833
- if arg_value is not None and arg_value != argument.get_default_value():
1834
-
1835
- # If get_match_length_of_arguments is True, check if the arg_value is
1836
- # a list and of the required size.
1837
- if argument.get_match_length_of_arguments():
1838
- required_length = argument.get_required_length()
1839
- if (isinstance(arg_value, list) and len(arg_value) != required_length) or\
1840
- (not isinstance(arg_value, list)):
1841
- raise TeradataMlException(Messages.get_message(
1842
- MessageCodes.INVALID_LIST_LENGTH).format(lang_name,
1843
- required_length),
1844
- MessageCodes.INVALID_LIST_LENGTH)
1845
-
1846
- # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
1847
- # Check lower bound and upper bound for numeric arguments.
1848
- if isinstance(arg_value, (int, float)):
1849
- lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
1850
- upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
1851
- _Validators._validate_argument_range(arg_value,
1852
- lang_name,
1853
- lbound=argument.get_lower_bound(),
1854
- ubound=argument.get_upper_bound(),
1855
- lbound_inclusive=lower_bound_inclusive,
1856
- ubound_inclusive=upper_bound_inclusive)
1857
-
1858
- # If the argument is a bool type, convert it to integer since SQL do
1859
- # not know boolean processing.
1860
- if bool in argument.get_python_type() and isinstance(arg_value, bool):
1861
- arg_value = int(arg_value)
1862
-
1863
- # Handle special cases for "arg_values" based on handling method.
1864
- arg_value = special_case_handler(arg_value) if special_case_handler is not None else arg_value
1865
- return {sql_name : arg_value}
1866
- return {}
1867
- else:
1868
- temp_dict = {}
1869
- for nested_arg in argument.get_nested_param_list():
1870
- temp_dict.update(self.__process_individual_argument(nested_arg, **kwargs))
1871
- return_dict = {sql_name : temp_dict} if temp_dict else {}
1872
- return return_dict
1873
-
1874
- def _process_other_argument(self, **kwargs):
1875
- """
1876
- DESCRIPTION:
1877
- Function to process the metadata arguments. It does the following:
1878
- * Iterates over the metadata arguments, calls __process_individual_argument
1879
- for each argument and populates the dict '_func_other_args'.
1880
-
1881
- PARAMETERS:
1882
- kwargs:
1883
- Specifies the keyword arguments passed to a function.
1884
-
1885
- RETURNS:
1886
- None.
1887
-
1888
- RAISES:
1889
- ValueError OR TypeError OR TeradataMlException.
1890
-
1891
- EXAMPLES:
1892
- self._process_other_arguments(arg1="string", arg2="db", arg3=2)
1893
- """
1894
- for argument in self._metadata.arguments:
1895
- self._func_other_args.update(self.__process_individual_argument(argument, **kwargs))
1896
-
1897
- # Process the InputFmt arguments.
1898
- for input_fmt_argument in self._metadata.input_fmt_arguments:
1899
- self._func_input_fmt_arguments.update(
1900
- self.__process_individual_argument(input_fmt_argument,
1901
- **kwargs))
1902
-
1903
- # Process the OutputFmt arguments.
1904
- for output_fmt_argument in self._metadata.output_fmt_arguments:
1905
- self._func_output_fmt_arguments.update(
1906
- self.__process_individual_argument(output_fmt_argument,
1907
- **kwargs))
1908
-
1909
- def _execute_query(self, persist=False, volatile=None):
1910
- """
1911
- DESCRIPTION:
1912
- Function to execute query on Vantage.
1913
-
1914
- PARAMETERS:
1915
- persist:
1916
- Optional Argument.
1917
- Specifies whether to persist a table or not.
1918
- Default Value: False
1919
- Type: bool
1920
-
1921
- RETURNS:
1922
- None
1923
-
1924
- RAISES:
1925
- TeradataMlException
1926
-
1927
- EXAMPLES:
1928
- self._execute_query()
1929
- """
1930
- try:
1931
- # Execute already generated query.
1932
- UtilFuncs._execute_query(query=self.sqlmr_query)
1933
-
1934
- if persist:
1935
- # SQL is already executed. So, print the table names.
1936
- for output_attribute, table_name in self._function_output_table_map.items():
1937
- print("{} data stored in table '{}'".format(output_attribute, table_name))
1938
-
1939
- except Exception as emsg:
1940
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
1941
- MessageCodes.TDMLDF_EXEC_SQL_FAILED)
1942
-
1943
-
1944
- class _BYOMFunctionExecutor(_SQLEFunctionExecutor):
1945
- def __init__(self, func_name):
1946
- """
1947
- DESCRIPTION:
1948
- Constructor for the class.
1949
-
1950
- PARAMETERS:
1951
- func_name:
1952
- Required Argument.
1953
- Specifies the name of the analytic function, which is exposed to the user.
1954
- Types: str
1955
-
1956
- RAISES:
1957
- None
1958
-
1959
- EXAMPLES:
1960
- _BYOMFunctionExecutor("ONNXPredict")
1961
- """
1962
- super().__init__(func_name, TeradataAnalyticFunctionTypes.BYOM.value)
1963
-
1964
- def _generate_query(self, volatile=False):
1965
- """
1966
- DESCRIPTION:
1967
- Function to generate the SQL query for BYOM analytic function.
1968
-
1969
- PARAMETERS:
1970
- volatile:
1971
- Optional Argument.
1972
- Specifies whether to create a volatile table or not.
1973
- Default Value: False
1974
- Type: bool
1975
-
1976
- RETURNS:
1977
- None.
1978
-
1979
- RAISES:
1980
- None.
1981
-
1982
- EXAMPLES:
1983
- self._generate_query()
1984
- """
1985
- # Check for byom install location and
1986
- # update the db_name.
1987
- db_name = None
1988
- if configure.byom_install_location is not None:
1989
- db_name = configure.byom_install_location
1990
-
1991
- self.__aqg_obj = AnalyticQueryGenerator(function_name=self._metadata.sql_function_name,
1992
- func_input_arg_sql_names=self._func_input_arg_sql_names,
1993
- func_input_table_view_query=self._func_input_table_view_query,
1994
- func_input_dataframe_type=self._func_input_dataframe_type,
1995
- func_input_distribution=self._func_input_distribution,
1996
- func_input_partition_by_cols=self._func_input_partition_by_cols,
1997
- func_input_order_by_cols=self._func_input_order_by_cols,
1998
- func_other_arg_sql_names=self._func_other_arg_sql_names,
1999
- func_other_args_values=self._func_other_args,
2000
- func_other_arg_json_datatypes=self._func_other_arg_json_datatypes,
2001
- func_output_args_sql_names=self._func_output_args_sql_names,
2002
- func_output_args_values=self._func_output_args,
2003
- engine="ENGINE_SQL",
2004
- db_name=db_name,
2005
- volatile_output=volatile,
2006
- skip_config_lookup=True,
2007
- func_input_local_order=self._func_input_local_order)
2008
-
2009
- # Invoke call to SQL-MR generation.
2010
- self.sqlmr_query = self.__aqg_obj._gen_sqlmr_select_stmt_sql()
1
+ """
2
+ Unpublished work.
3
+ Copyright (c) 2021 by Teradata Corporation. All rights reserved.
4
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
5
+
6
+ Primary Owner: pradeep.garre@teradata.com
7
+ Secondary Owner: PankajVinod.Purandare@teradata.com
8
+
9
+ This file implements several classes which executes analytic functions such as
10
+ SQLE functions and UAF functions on Vantage.
11
+ File implements classes for following:
12
+ * _AnlyticFunctionExecutor
13
+ * _SQLEFunctionExecutor
14
+ * _TableOperatorExecutor
15
+ * _BYOMFunctionExecutor
16
+ """
17
+
18
+ from teradataml.options.configure import configure
19
+ from teradataml.common.constants import TeradataConstants, TeradataAnalyticFunctionTypes
20
+ from teradataml.analytics.json_parser import PartitionKind
21
+ from teradataml.analytics.analytic_query_generator import AnalyticQueryGenerator, UAFQueryGenerator
22
+ from teradataml.analytics.json_parser.json_store import _JsonStore
23
+ from teradataml.analytics.utils import FuncSpecialCaseHandler
24
+ from teradataml.options.display import display
25
+ from teradataml.common.exceptions import TeradataMlException
26
+ from teradataml.common.garbagecollector import GarbageCollector
27
+ from teradataml.common.messages import Messages, MessageCodes
28
+ from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
29
+ from teradataml.common.utils import UtilFuncs
30
+ from teradataml.context.context import _get_context_temp_databasename
31
+ from teradataml.table_operators.table_operator_query_generator import TableOperatorQueryGenerator
32
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
33
+ from teradataml.utils.dtypes import _ListOf
34
+ from teradataml.utils.validators import _Validators
35
+
36
+ import time
37
+
38
+
39
+ class _AnlyticFunctionExecutor:
40
+ """
41
+ Class to hold the common attributes to execute analytic function.
42
+ """
43
+ def __init__(self, func_name, func_type):
44
+ """
45
+ DESCRIPTION:
46
+ Constructor for the class.
47
+
48
+ PARAMETERS:
49
+ func_name:
50
+ Required Argument.
51
+ Specifies the name of the analytic function, which is exposed to user.
52
+ Types: str
53
+
54
+ func_type:
55
+ Required Argument.
56
+ Specifies whether the argument "func_name" is SQLE, UAF or Table Operator function.
57
+ Types: str
58
+
59
+ RAISES:
60
+ TypeError OR ValueError OR TeradataMlException
61
+ """
62
+ self.func_name = func_name
63
+ self._func_type = func_type
64
+
65
+ # Input arguments passed, i.e., data members of the dynamic class to be generated.
66
+ self._dyn_cls_data_members = {}
67
+
68
+ # Output table arguments list
69
+ self._func_output_args_sql_names = []
70
+ self._func_output_args = []
71
+ self._func_output_table_type = []
72
+
73
+ # Generate lists for rest of the function arguments
74
+ self._func_other_arg_sql_names = []
75
+ self._func_other_args = []
76
+ self._func_other_arg_json_datatypes = []
77
+ self.sqlmr_query = None
78
+ self._function_output_table_map = {}
79
+ self._sql_specific_attributes = {}
80
+ self._metadata = _JsonStore.get_function_metadata(self.func_name)
81
+ self._mlresults = []
82
+ self._awu = AnalyticsWrapperUtils()
83
+ self.__build_time = None
84
+ self._is_argument_dataframe = lambda object: type(object).__name__ == "DataFrame"
85
+
86
+ # Initialize FuncSpecialCaseHandler.
87
+ self._spl_func_obj = FuncSpecialCaseHandler(self.func_name)
88
+
89
+ # Initialize database object type.
90
+ self.db_object_type = TeradataConstants.TERADATA_VIEW
91
+
92
+ @staticmethod
93
+ def _validate_analytic_function_argument(func_arg_name, func_arg_value, argument, additional_valid_types=None):
94
+ """
95
+ DESCRIPTION:
96
+ Function to validate the analytic function arguments. This function does
97
+ the following validations
98
+ * Checks for missing mandatory argument.
99
+ * Checks for the expected type for argument.
100
+ * Checks for the expected values for argument.
101
+ * Checks for empty value.
102
+
103
+ PARAMETERS:
104
+ func_arg_name:
105
+ Required Argument.
106
+ Specifies the name of the argument.
107
+ Type: str
108
+
109
+ func_arg_value:
110
+ Required Argument.
111
+ Specifies the value passed to argument 'func_arg_name' in analytic function.
112
+ Type: str OR float OR int OR list
113
+
114
+ argument:
115
+ Required Argument.
116
+ Specifies the argument object (_AnlyArgumentBase) containing argument
117
+ information to be validated.
118
+ Type: _AnlyFuncArgument OR _AnlyFuncInput
119
+
120
+ RETURNS:
121
+ None
122
+
123
+ RAISES:
124
+ ValueError OR TypeError
125
+
126
+ EXAMPLES:
127
+ self._validate_analytic_function_argument("arg", 1, metadata.arguments)
128
+ """
129
+ # Make sure that a non-NULL value has been supplied for all mandatory arguments
130
+ py_types = argument.get_python_type()
131
+ if additional_valid_types:
132
+ if isinstance(additional_valid_types, tuple):
133
+ py_types = (py_types, ) + additional_valid_types
134
+ else:
135
+ py_types = (py_types, additional_valid_types)
136
+
137
+ argument_info = [func_arg_name,
138
+ func_arg_value,
139
+ not argument.is_required(),
140
+ py_types
141
+ ]
142
+ _Validators._validate_missing_required_arguments([argument_info])
143
+
144
+ # Validate for empty string if argument accepts a column name for either input or output.
145
+ if not argument.is_empty_value_allowed() or argument.is_output_column():
146
+ argument_info.append(True)
147
+
148
+ # Validate the permitted values.
149
+ if argument.get_permitted_values():
150
+ if len(argument_info) == 4:
151
+ argument_info.append(True)
152
+ argument_info.append(argument.get_permitted_values())
153
+
154
+ # Validate the function arguments.
155
+ _Validators._validate_function_arguments([argument_info])
156
+
157
+ @collect_queryband(attr="func_name")
158
+ def _execute_query(self, persist=False, volatile=False):
159
+ """
160
+ DESCRIPTION:
161
+ Function to execute query on Vantage.
162
+
163
+ PARAMETERS:
164
+ persist:
165
+ Optional Argument.
166
+ Specifies whether to persist the result in a table or not.
167
+ Default Value: False
168
+ Type: bool
169
+
170
+ volatile:
171
+ Optional Argument.
172
+ Specifies whether to create a volatile table or not.
173
+ Default Value: False
174
+ Type: bool
175
+
176
+ RETURNS:
177
+ None
178
+
179
+ RAISES:
180
+ None
181
+
182
+ EXAMPLES:
183
+ self._execute_query()
184
+ """
185
+ # Generate STDOUT table name and add it to the output table list.
186
+ func_params = self._get_generate_temp_table_params(persist=persist, volatile=volatile)
187
+ sqlmr_stdout_temp_tablename = UtilFuncs._generate_temp_table_name(**func_params)
188
+
189
+ __execute = UtilFuncs._create_table
190
+ __execute_params = (sqlmr_stdout_temp_tablename, self.sqlmr_query, volatile)
191
+ if func_params["table_type"] == TeradataConstants.TERADATA_VIEW:
192
+ __execute = UtilFuncs._create_view
193
+ __execute_params = (sqlmr_stdout_temp_tablename, self.sqlmr_query)
194
+
195
+ try:
196
+ __execute(*__execute_params)
197
+
198
+ # List stores names of the functions that will produce "output" attribute
199
+ # when more than one results are expected.
200
+ output_attr_functions = ["BincodeFit", "ChiSq", "PolynomialFeaturesFit",
201
+ "RowNormalizeFit", "ScaleFit", "SimpleImputeFit"]
202
+
203
+ # Store the result table in map.
204
+ if self.func_name in output_attr_functions:
205
+ self._function_output_table_map["output"] = sqlmr_stdout_temp_tablename
206
+ else:
207
+ self._function_output_table_map["result"] = sqlmr_stdout_temp_tablename
208
+
209
+ if persist:
210
+ # SQL is executed. So, print the table/view names.
211
+ for output_attribute, table_name in self._function_output_table_map.items():
212
+ print("{} data stored in table '{}'".format(output_attribute, table_name))
213
+
214
+ except Exception as emsg:
215
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
216
+ MessageCodes.TDMLDF_EXEC_SQL_FAILED)
217
+
218
+ def _get_generate_temp_table_params(self, persist=False, volatile=False):
219
+ """
220
+ DESCRIPTION:
221
+ Function to get the required parameters to create either table or view.
222
+ When function has output table arguments or argument persist is set to True,
223
+ then function returns parameters to create table otherwise returns parameters
224
+ to create view. If persist is set to True or volatile is set to True, in such cases,
225
+ tables created are not GC'ed.
226
+
227
+ PARAMETERS:
228
+ persist:
229
+ Optional Argument.
230
+ Specifies whether to persist the output table or not.
231
+ When set to True, output tables created are not garbage collected
232
+ at the end of the session, otherwise they are garbage collected.
233
+ Default Value: False
234
+ Types: bool
235
+
236
+ volatile:
237
+ Optional Argument.
238
+ Specifies whether to create the output table as volatile table or not.
239
+ When set to True, output tables created are garbage collected
240
+ at the end of the session, otherwise they are not garbage collected.
241
+ Default Value: False
242
+ Types: bool
243
+
244
+ RETURNS:
245
+ dict
246
+
247
+ RAISES:
248
+ None
249
+
250
+ EXAMPLES:
251
+ self._get_generate_temp_table_params(True, True)
252
+ """
253
+ use_default_database = True
254
+ prefix = "td_sqlmr_out_"
255
+ gc_on_quit = True
256
+
257
+ # If result is to be persisted or if the table is a volaile table then,
258
+ # it must not be Garbage collected.
259
+ if persist or volatile:
260
+ gc_on_quit = False
261
+ prefix = "td_sqlmr_{}_out_".format("persist" if persist else "volatile")
262
+ use_default_database = False if volatile else True
263
+
264
+ return {"use_default_database": use_default_database,
265
+ "table_type": self.db_object_type,
266
+ "prefix": prefix,
267
+ "gc_on_quit": gc_on_quit}
268
+
269
+ def _process_output_argument(self, persist=False, volatile=False, **kwargs):
270
+ """
271
+ DESCRIPTION:
272
+ Function to process output argument(s) of analytic function.
273
+
274
+ PARAMETERS:
275
+ persist:
276
+ Optional Argument.
277
+ Specifies whether to persist the output table or not.
278
+ When session is disconnected, if function is executed with persist
279
+ set to False, then output tables are removed.
280
+ When set to True, output tables created are not garbage collected
281
+ at the end of the session, otherwise they are garbage collected.
282
+ Default Value: False
283
+ Types: bool
284
+
285
+ volatile:
286
+ Optional Argument.
287
+ Specifies whether to create the output table as volatile table or not.
288
+ When set to True, output tables created are garbage collected
289
+ at the end of the session, otherwise they are not garbage collected.
290
+ Default Value: False
291
+ Types: bool
292
+
293
+ kwargs:
294
+ Specifies the keyword arguments passed to a function.
295
+
296
+ RETURNS:
297
+ None.
298
+
299
+ RAISES:
300
+ None.
301
+
302
+ EXAMPLES:
303
+ self._process_output_argument()
304
+ """
305
+ # Process the output_tables argument(s) of the metadata.
306
+ for output_argument in self._metadata.output_tables:
307
+ lang_name = output_argument.get_lang_name()
308
+
309
+ # Generate the name of the table.
310
+ func_params = self._get_generate_temp_table_params(persist=persist, volatile=volatile)
311
+ temp_table_name = UtilFuncs._generate_temp_table_name(**func_params)
312
+
313
+ # By default, populate the output table lists irrespective of 'is_required'. However,
314
+ # if the output table has a dependent argument, then check for the dependent argument
315
+ # value and decide whether to populate output table lists or not.
316
+ populate_output_tables = True
317
+ dependent_argument = output_argument.get_is_required_dependent_argument()
318
+ if dependent_argument is not None:
319
+ # Dependent argument can be input_tables or arguments or output_tables.
320
+ # Get the analytic function arguments based on the argument type and
321
+ # check whether dependenncy is satisfied or not.
322
+ for arg in getattr(self._metadata, dependent_argument.type):
323
+ if arg.get_sql_name() == dependent_argument.sql_name:
324
+ lang_name = arg.get_lang_name()
325
+ lang_name_val = kwargs.get(lang_name)
326
+ if not dependent_argument.is_required(lang_name_val):
327
+ populate_output_tables = False
328
+ break
329
+
330
+ if populate_output_tables:
331
+ self._func_output_args_sql_names.append(output_argument.get_sql_name())
332
+ self._func_output_args.append(temp_table_name)
333
+ self._function_output_table_map[lang_name] = temp_table_name
334
+
335
+ def _process_other_argument(self, **kwargs):
336
+ """
337
+ DESCRIPTION:
338
+ Function to process other arguments. This function does the following:
339
+ * Checks the required arguments are passed or not.
340
+ * Checks the type of the arguments are expected or not.
341
+ * If argument accepts only specified values, function checks whether
342
+ the value passed is in the specified values or not.
343
+ * If all the checks pass, it then populates the corresponding lists
344
+ with respective values.
345
+
346
+ PARAMETERS:
347
+ kwargs:
348
+ Specifies the keyword arguments passed to a function.
349
+
350
+ RETURNS:
351
+ None.
352
+
353
+ RAISES:
354
+ ValueError OR TypeError OR TeradataMlException.
355
+
356
+ EXAMPLES:
357
+ self._process_other_arguments(arg1="string", arg2="db", arg3=2)
358
+ """
359
+ sequence_input_by_list = []
360
+
361
+ # Before populating the corresponding lists, make sure to empty those so
362
+ # duplicates won't be populated even if analytic function execution happens twice.
363
+ self._func_other_arg_sql_names = []
364
+ self._func_other_args = []
365
+ self._func_other_arg_json_datatypes = []
366
+
367
+ # Let's process formula argument.
368
+ if len(self._metadata.formula_args) > 0:
369
+ formula = kwargs.pop("formula", None)
370
+
371
+ # If formula is passed, process formula argument,
372
+ # else process components of formula individually as a
373
+ # part of normal function argument processing.
374
+ formula_comp_provided = False
375
+ formula_comp_args = []
376
+ if formula is not None:
377
+ _Validators._validate_function_arguments([["formula", formula, True, str, True]])
378
+ input_data = kwargs.get(self._metadata.formula_args[0].get_target_table_lang_name())
379
+ formula_obj = AnalyticsWrapperUtils()._validate_formula_notation(formula, input_data, "formula")
380
+
381
+ for formula_arg_component in self._metadata.formula_args:
382
+ # Check if this formula argument component is separately provided
383
+ # along with 'formula'. If so, raise error.
384
+ formula_arg_value = kwargs.get(formula_arg_component.get_lang_name(), None)
385
+ formula_comp_args.append(formula_arg_component.get_lang_name())
386
+ if formula_arg_value is not None or formula_comp_provided:
387
+ formula_comp_provided = True
388
+ elif formula is not None:
389
+ # Processing dependent component of formula.
390
+ if formula_arg_component.get_r_order_number() == 0:
391
+ __response_column = formula_obj._get_dependent_vars()
392
+ if len(__response_column) > 0:
393
+ kwargs[formula_arg_component.get_lang_name()] = __response_column
394
+
395
+ # Processing non-dependent components of formula.
396
+ # Non-dependent components of formula can consist columns of either all, numeric
397
+ # or categorical type.
398
+ else:
399
+ if formula_arg_component.get_r_order_number() == -1:
400
+ allowed_types_list = formula_arg_component.get_allowed_type_groups()
401
+ json_to_python_type_map = {"NUMERIC": "numerical",
402
+ "NUMERICAL": "numerical"
403
+ }
404
+ col_data_type = json_to_python_type_map.get(allowed_types_list[0], "all")
405
+ elif formula_arg_component.get_r_order_number() == -2:
406
+ col_data_type = "categorical"
407
+
408
+ __columns = AnalyticsWrapperUtils()._get_columns_by_type(formula_obj,
409
+ input_data,
410
+ col_data_type)
411
+ if len(__columns) > 0:
412
+ kwargs[formula_arg_component.get_lang_name()] = __columns
413
+ # Pass dummy value to validator if any of the formula component argument is provided.
414
+ # Else pass None.
415
+ _Validators._validate_mutually_exclusive_arguments(formula, "formula",
416
+ 1 if formula_comp_provided else None,
417
+ formula_comp_args)
418
+
419
+ # Let's process all other arguments.
420
+ for argument in self._metadata.arguments:
421
+ sql_name = argument.get_sql_name()
422
+ arg_name = argument.get_lang_name()
423
+ arg_value = kwargs.get(arg_name)
424
+ # Set the "argument".
425
+ self._spl_func_obj.set_arg_name(argument)
426
+ # Let's get spl handler if function requires.
427
+ special_case_handler = self._spl_func_obj._get_handle()
428
+
429
+ self._validate_analytic_function_argument(arg_name, arg_value, argument)
430
+
431
+ # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
432
+ # Check lower bound and upper bound for number type of arguments.
433
+ if isinstance(arg_value, (int, float)):
434
+
435
+ lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
436
+ upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
437
+ _Validators._validate_argument_range(arg_value,
438
+ arg_name,
439
+ lbound=argument.get_lower_bound(),
440
+ ubound=argument.get_upper_bound(),
441
+ lbound_inclusive=lower_bound_inclusive,
442
+ ubound_inclusive=upper_bound_inclusive)
443
+
444
+ if argument.is_column_argument() and not argument.get_target_table():
445
+ raise TeradataMlException(
446
+ Messages.get_message(MessageCodes.INVALID_JSON, "{}.json".format(self._metadata.sql_function_name),
447
+ "Argument '{}' is specified as column argument but "
448
+ "is Target table is not specified".format(sql_name)), MessageCodes.INVALID_JSON)
449
+
450
+ # Additional Validations if argument is a Column name.
451
+ if argument.is_column_argument() and argument.get_target_table():
452
+ target_table_argument_name = argument.get_target_table_lang_name()
453
+ dataframe = kwargs.get(target_table_argument_name)
454
+ # Input table can be an object of MLE Functions too.
455
+ if not self._is_argument_dataframe(dataframe) and dataframe is not None:
456
+ dataframe = dataframe._mlresults[0]
457
+
458
+ # Validate column is existed or not in the table.
459
+ _Validators._validate_dataframe_has_argument_columns(
460
+ arg_value, arg_name, dataframe, target_table_argument_name)
461
+
462
+ # Append square brackets for column range when function
463
+ # does not require special case handler.
464
+ arg_value = self._spl_func_obj._add_square_bracket(arg_value)
465
+
466
+ # SequenceInputBy arguments require special processing.
467
+ if 500 <= argument.get_r_order_number() <= 510:
468
+ quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
469
+ sequence_input_by_list.append("{}:{}".format(sql_name, quoted_value))
470
+ continue
471
+
472
+ if arg_value is not None and arg_value != argument.get_default_value():
473
+
474
+ # Specific type of arguments required to be passed in a single quote.
475
+ # Append quote if argument requires it.
476
+
477
+ # Handle special cases for arg_values based on function handler.
478
+ arg_value = special_case_handler(arg_value, self._quote_collapse_other_args) \
479
+ if special_case_handler is not None \
480
+ else self._quote_collapse_other_args(argument, arg_value)
481
+
482
+ self._func_other_arg_sql_names.append(sql_name)
483
+ self._func_other_args.append(arg_value)
484
+ self._func_other_arg_json_datatypes.append(argument.get_data_type())
485
+
486
+ if sequence_input_by_list:
487
+ self._func_other_arg_sql_names.append("SequenceInputBy")
488
+ sequence_input_by_arg_value = UtilFuncs._teradata_collapse_arglist(sequence_input_by_list, "'")
489
+ self._func_other_args.append(sequence_input_by_arg_value)
490
+ self._func_other_arg_json_datatypes.append("STRING")
491
+ self._sql_specific_attributes["SequenceInputBy"] = sequence_input_by_arg_value
492
+
493
+ def _create_dynamic_class(self):
494
+ """
495
+ DESCRIPTION:
496
+ Function dynamically creates a class with name as analytic function name.
497
+
498
+ RETURNS:
499
+ class
500
+
501
+ RAISES:
502
+ None.
503
+
504
+ EXAMPLE:
505
+ self._create_dynamic_class()
506
+ """
507
+ # Constructor for the dynamic class.
508
+ def constructor(self):
509
+ """ Constructor for dynamic class """
510
+ # Do Nothing...
511
+ pass
512
+
513
+ _function_output_table_map = self._function_output_table_map
514
+ # __repr__ method for dynamic class.
515
+ # Note that the self represents the dynamic class object. Not the
516
+ # instance of _AnlyticFunctionExecutor. So, DataFrames will be available as
517
+ # attributes of the object, which is created using dynamic class.
518
+ def print_result(self):
519
+ """ Function to be used for representation of InDB function type object. """
520
+ repr_string = ""
521
+ for key in _function_output_table_map:
522
+ repr_string = "{}\n############ {} Output ############".format(repr_string, key)
523
+ repr_string = "{}\n\n{}\n\n".format(repr_string, getattr(self,key))
524
+ return repr_string
525
+ self._dyn_cls_data_members["__repr__"] = print_result
526
+
527
+ query = self.sqlmr_query
528
+ build_time = None if self.__build_time is None else round(self.__build_time, 2)
529
+
530
+ self._dyn_cls_data_members["show_query"] = lambda x: query
531
+ self._dyn_cls_data_members["get_build_time"] = lambda x: build_time
532
+
533
+ # To list attributes using dict()
534
+ self._dyn_cls_data_members["__dict__"] = self._dyn_cls_data_members
535
+ self._dyn_cls_data_members["_mlresults"] = self._mlresults
536
+
537
+ # Dynamic class creation with In-DB function name.
538
+ indb_class = type(self.func_name, (object,), self._dyn_cls_data_members)
539
+
540
+ return indb_class()
541
+
542
+ def _generate_query(self):
543
+ """
544
+ DESCRIPTION:
545
+ An interface, which should be implemented by child class(es) to generate the
546
+ query for analytic function.
547
+
548
+ RETURNS:
549
+ None
550
+
551
+ RAISES:
552
+ None.
553
+
554
+ EXAMPLE:
555
+ self._generate_query()
556
+ """
557
+ raise NotImplementedError("Function should be implemented in child class.")
558
+
559
+ def _process_input_argument(self, **kwargs):
560
+ """
561
+ DESCRIPTION:
562
+ An interface, which should be implemented by child class(es) to
563
+ process input argument(s).
564
+
565
+ PARAMETERS:
566
+ kwargs:
567
+ Specifies the keyword arguments passed to a function.
568
+
569
+ RETURNS:
570
+ None.
571
+
572
+ RAISES:
573
+ None.
574
+
575
+ EXAMPLES:
576
+ self._process_input_argument()
577
+ """
578
+ raise NotImplementedError("Function should be implemented in child class.")
579
+
580
+ def _process_function_output(self, **kwargs):
581
+ """
582
+ DESCRIPTION:
583
+ An interface, which should be implemented by child class(es) to
584
+ process the output.
585
+
586
+ PARAMETERS:
587
+ kwargs:
588
+ Specifies the keyword arguments passed to a function.
589
+
590
+ RETURNS:
591
+ None.
592
+
593
+ RAISES:
594
+ None.
595
+
596
+ EXAMPLES:
597
+ self._process_function_output()
598
+ """
599
+ raise NotImplementedError("Function should be implemented in child class.")
600
+
601
+ def _execute_function(self,
602
+ skip_input_arg_processing=False,
603
+ skip_output_arg_processing=False,
604
+ skip_other_arg_processing=False,
605
+ skip_func_output_processing=False,
606
+ skip_dyn_cls_processing=False,
607
+ **kwargs):
608
+ """
609
+ DESCRIPTION:
610
+ Function processes arguments and executes the analytic function.
611
+
612
+ PARAMETERS:
613
+ skip_input_arg_processing:
614
+ Optional Argument.
615
+ Specifies whether to skip input (data) argument processing or not.
616
+ Default is to process the input (data) argument.
617
+ When set to True, caller should make sure to process "input" argument and
618
+ pass SQL argument and values as part of kwargs to this function.
619
+ Default Value: False
620
+ Types: bool
621
+
622
+ skip_output_arg_processing:
623
+ Optional Argument.
624
+ Specifies whether to skip output argument processing or not.
625
+ Default is to process the output arguments.
626
+ When set to True, caller should make sure to process all output arguments and
627
+ pass equivalent SQL argument and values as part of kwargs to this function.
628
+ Default Value: False
629
+ Types: bool
630
+
631
+ skip_other_arg_processing:
632
+ Optional Argument.
633
+ Specifies whether to skip other argument processing or not.
634
+ Default is to process the other arguments, i.e., kwargs.
635
+ When set to True, caller should make sure to process all other arguments are
636
+ processed internally by the function.
637
+ Default Value: False
638
+ Types: bool
639
+
640
+ skip_func_output_processing:
641
+ Optional Argument.
642
+ Specifies whether to skip function output processing or not.
643
+ Default is to process the same.
644
+ When set to True, caller should make sure to process function output.
645
+ Generally, when this argument is set to True, one must also
646
+ set "skip_dyn_cls_processing" to True.
647
+ Default Value: False
648
+ Types: bool
649
+
650
+ skip_dyn_cls_processing:
651
+ Optional Argument.
652
+ Specifies whether to skip dynamic class processing or not.
653
+ Default is to process the dynamic class, where it creates a dynamic
654
+ class and an instance of the same and returns the same.
655
+ When set to True, caller should make sure to process dynamic class and
656
+ return an instance of the same.
657
+ Default Value: False
658
+ Types: bool
659
+
660
+ kwargs:
661
+ Specifies the keyword arguments passed to a function.
662
+
663
+ RETURNS:
664
+ An object of class, with the name same as analytic function.
665
+
666
+ RAISES:
667
+ TeradataMlException, TypeError
668
+
669
+ EXAMPLES:
670
+ self._execute_function(arg1="string", arg2="db", arg3=2)
671
+ """
672
+ # kwargs may contain all additional arguments in 'generic_arguments'.
673
+ # Hence update it to actual kwargs.
674
+ kwargs.update(kwargs.pop("generic_arguments", {}))
675
+
676
+ # Add all arguments to dynamic class as data members.
677
+
678
+ start_time = time.time()
679
+ persist = kwargs.get("persist", False)
680
+ volatile = kwargs.get("volatile", False)
681
+
682
+ # Validate local_order_column argument type and values.
683
+ arg_info_matrix = [["persist", persist, True, bool], ["volatile", volatile, True, bool]]
684
+ # Check for valid types and values.
685
+ _Validators._validate_function_arguments(arg_info_matrix)
686
+
687
+ if persist and volatile:
688
+ raise TeradataMlException(
689
+ Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH, "persist", "volatile"),
690
+ MessageCodes.CANNOT_USE_TOGETHER_WITH)
691
+
692
+ self._dyn_cls_data_members.update(kwargs)
693
+
694
+ # If function produces output tables, i.e., function has output table arguments,
695
+ # then 'db_object_type' should be "table" or if analytic function does not support
696
+ # reading from a view created on output, then 'db_object_type' should be "table".
697
+ # If result is to be persisted or if the table is a volaile table then, db_object_type
698
+ # should be "table" else it should be "view".
699
+ self.db_object_type = (
700
+ TeradataConstants.TERADATA_VOLATILE_TABLE if volatile
701
+ else TeradataConstants.TERADATA_TABLE if len(self._metadata.output_tables) > 0 \
702
+ or not self._metadata._is_view_supported or persist
703
+ else TeradataConstants.TERADATA_VIEW
704
+ )
705
+ if not skip_input_arg_processing:
706
+ self._process_input_argument(**kwargs)
707
+
708
+ # check func_name is GLM and data_partition_column, data_hash_column, local_order_data are passed
709
+ if self.func_name in ['GLM', 'TDGLMPredict'] and \
710
+ any(key in kwargs for key in ['data_partition_column', 'data_hash_column', 'local_order_data']):
711
+ skip_output_arg_processing = True
712
+
713
+ if not skip_output_arg_processing:
714
+ self._process_output_argument(**kwargs)
715
+
716
+ if not skip_other_arg_processing:
717
+ self._process_other_argument(**kwargs)
718
+
719
+ self._generate_query(volatile=volatile)
720
+
721
+ # Print SQL-MR query if requested to do so.
722
+ if display.print_sqlmr_query:
723
+ print(self.sqlmr_query)
724
+
725
+ self._execute_query(persist, volatile)
726
+
727
+ if not skip_func_output_processing:
728
+ self._process_function_output(**kwargs)
729
+
730
+ # Set the build time.
731
+ self.__build_time = time.time() - start_time
732
+
733
+ if not skip_dyn_cls_processing:
734
+ return self._create_dynamic_class()
735
+
736
+ def _quote_collapse_other_args(self, argument, arg_value):
737
+ """
738
+ DESCRIPTION:
739
+ Given a list as an argument this will single quote all the
740
+ list elements and combine them into a single string separated by
741
+ commas.
742
+ Append single quote to the elements which are required to be quoted.
743
+
744
+ PARAMETERS:
745
+ argument:
746
+ Required Argument.
747
+ Specifies the argument object (_AnlyArgumentBase).
748
+ Types: _AnlyFuncArgument
749
+
750
+ arg_value:
751
+ Required Argument.
752
+ Specifies the arg_value to be quoted and combined.
753
+ Types: list OR string OR int OR bool OR float
754
+
755
+ RETURNS:
756
+ None
757
+
758
+ RAISES:
759
+ None
760
+
761
+ EXAMPLES:
762
+ self._quote_collapse_other_args(argument, arg_value)
763
+ """
764
+ if isinstance(argument.get_data_type(), list):
765
+ if isinstance(arg_value, (str, bool)):
766
+ return UtilFuncs._teradata_collapse_arglist(arg_value, "'")
767
+ else:
768
+ return UtilFuncs._teradata_collapse_arglist(arg_value, "")
769
+ else:
770
+ if (argument.get_data_type().lower() in ("column", "columns", "column_names", "string", "boolean")):
771
+ return UtilFuncs._teradata_collapse_arglist(arg_value, "'")
772
+ else:
773
+ return UtilFuncs._teradata_collapse_arglist(arg_value, "")
774
+
775
+ class _SQLEFunctionExecutor(_AnlyticFunctionExecutor):
776
+ """ Class to hold the attributes and provide methods to enable function execution. """
777
+ def __init__(self, func_name, func_type=TeradataAnalyticFunctionTypes.SQLE.value):
778
+ """
779
+ DESCRIPTION:
780
+ Constructor for the class.
781
+
782
+ PARAMETERS:
783
+ func_name:
784
+ Required Argument.
785
+ Specifies the name of the analytic function, which is exposed to user.
786
+ Types: str
787
+
788
+ func_type:
789
+ Optional Argument.
790
+ Specifies the type of the analytic function.
791
+ Types: str
792
+
793
+ RAISES:
794
+ TypeError OR ValueError OR TeradataMlException
795
+
796
+ EXAMPLES:
797
+ _SQLEFunctionExecutor("AdaBoost")
798
+ """
799
+ super().__init__(func_name, func_type)
800
+
801
+ # Lists to hold Input Argument (Table) Information
802
+ self._func_input_arg_sql_names = []
803
+ self._func_input_table_view_query = []
804
+ self._func_input_dataframe_type = []
805
+ self._func_input_distribution = []
806
+ self._func_input_partition_by_cols = []
807
+ self._func_input_order_by_cols = []
808
+ self._func_input_local_order = []
809
+
810
+ def _generate_query(self, volatile=False):
811
+ """
812
+ DESCRIPTION:
813
+ Function to generate the SQL query for SQLE analytic function.
814
+
815
+ PARAMETERS:
816
+ volatile:
817
+ Optional Argument.
818
+ Specifies whether to create a volatile table or not.
819
+ Default Value: False
820
+ Type: bool
821
+
822
+ RETURNS:
823
+ None.
824
+
825
+ RAISES:
826
+ None.
827
+
828
+ EXAMPLES:
829
+ self._generate_query()
830
+ """
831
+
832
+ self.__aqg_obj = AnalyticQueryGenerator(function_name=self._metadata.sql_function_name,
833
+ func_input_arg_sql_names=self._func_input_arg_sql_names,
834
+ func_input_table_view_query=self._func_input_table_view_query,
835
+ func_input_dataframe_type=self._func_input_dataframe_type,
836
+ func_input_distribution=self._func_input_distribution,
837
+ func_input_partition_by_cols=self._func_input_partition_by_cols,
838
+ func_input_order_by_cols=self._func_input_order_by_cols,
839
+ func_other_arg_sql_names=self._func_other_arg_sql_names,
840
+ func_other_args_values=self._func_other_args,
841
+ func_other_arg_json_datatypes=self._func_other_arg_json_datatypes,
842
+ func_output_args_sql_names=self._func_output_args_sql_names,
843
+ func_output_args_values=self._func_output_args,
844
+ engine="ENGINE_SQL",
845
+ volatile_output=volatile,
846
+ skip_config_lookup=True,
847
+ func_input_local_order=self._func_input_local_order)
848
+
849
+ # Invoke call to SQL-MR generation.
850
+ self.sqlmr_query = self.__aqg_obj._gen_sqlmr_select_stmt_sql()
851
+
852
+ def _get_input_args(self, **kwargs):
853
+ """
854
+ DESCRIPTION:
855
+ Function to get input argument(s).
856
+
857
+ PARAMETERS:
858
+ kwargs:
859
+ Specifies the keyword arguments passed to a function.
860
+
861
+ RETURNS:
862
+ tuple, element1 represents input DataFrame argument name and
863
+ second element represents the Input Argument metadata.
864
+
865
+ RAISES:
866
+ None.
867
+
868
+ EXAMPLES:
869
+ self._get_input_args()
870
+ """
871
+ sort_order = list(kwargs.keys())
872
+ input_table_dict = {}
873
+
874
+ for _inp_attribute in self._metadata.input_tables:
875
+ input_table_arg = _inp_attribute.get_lang_name()
876
+
877
+ # Store the first argument directly into the dictionary
878
+ input_table_dict[input_table_arg] = _inp_attribute
879
+
880
+ # Check if SQL function allows multiple values as input.
881
+ if _inp_attribute.allows_lists():
882
+ _index = 1
883
+ while True:
884
+ _input_table_arg = "{}{}".format(input_table_arg, _index)
885
+ if _input_table_arg in kwargs:
886
+ input_table_dict[_input_table_arg] = _inp_attribute
887
+ _index += 1
888
+ else:
889
+ break
890
+
891
+ # For ColumnTransformer, yield the input arguments in the order they are passed.
892
+ if self.func_name == "ColumnTransformer":
893
+ for key in sort_order:
894
+ if key in input_table_dict:
895
+ yield key, input_table_dict[key]
896
+ else:
897
+ for key in input_table_dict:
898
+ yield key, input_table_dict[key]
899
+
900
+ def _process_input_argument(self, **kwargs):
901
+ """
902
+ DESCRIPTION:
903
+ Function to process input argument(s).
904
+
905
+ PARAMETERS:
906
+ kwargs:
907
+ Specifies the keyword arguments passed to a function.
908
+
909
+ RETURNS:
910
+ None.
911
+
912
+ RAISES:
913
+ None.
914
+
915
+ EXAMPLES:
916
+ self._process_input_argument()
917
+ """
918
+ for input_table_arg, input_attribute in self._get_input_args(**kwargs):
919
+ partition_column_arg = "{}_partition_column".format(input_table_arg)
920
+ order_column_arg = "{}_order_column".format(input_table_arg)
921
+ local_order_column_arg = "local_order_{}".format(input_table_arg)
922
+ hash_column_arg = "{}_hash_column".format(input_table_arg)
923
+
924
+ # Get the argument values from kwargs
925
+ input_table_arg_value = kwargs.get(input_table_arg)
926
+ partition_column_arg_value = kwargs.get(partition_column_arg)
927
+ order_column_arg_value = kwargs.get(order_column_arg)
928
+ local_order_column_arg_value = kwargs.get(local_order_column_arg, False)
929
+ hash_column_arg_value = kwargs.get(hash_column_arg)
930
+
931
+ reference_class = None
932
+ if input_attribute.is_reference_function_acceptable():
933
+ reference_class = self._metadata.get_reference_function_class()
934
+
935
+ # Validate the input table arguments.
936
+ self._validate_analytic_function_argument(
937
+ input_table_arg, input_table_arg_value, input_attribute, additional_valid_types=reference_class)
938
+
939
+ # If input is an object of reference Function, then get the DataFrame from it.
940
+ if reference_class and isinstance(input_table_arg_value, reference_class):
941
+ input_table_arg_value = input_table_arg_value._mlresults[0]
942
+ # Don't fill the input lists if the value is None.
943
+ if input_table_arg_value is None:
944
+ continue
945
+
946
+ # Validate local_order_column argument type and values.
947
+ arg_info_matrix = [[local_order_column_arg, local_order_column_arg_value, True, bool, True]]
948
+ # Check emptiness and types.
949
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
950
+ _Validators._validate_function_arguments(arg_info_matrix)
951
+
952
+ for arg in [partition_column_arg, order_column_arg, hash_column_arg]:
953
+ arg_value = kwargs.get(arg)
954
+
955
+ expected_types = (str, list)
956
+ # For partition column, user can pass partition kind too.
957
+ if "partition" in arg:
958
+ expected_types = (str, _ListOf(str), PartitionKind)
959
+ arg_info_matrix = [[arg, arg_value, True, expected_types, True]]
960
+
961
+ # Check for empty string and datatype.
962
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
963
+ _Validators._validate_function_arguments(arg_info_matrix)
964
+
965
+ # Set order column value to "NA_character_" if it is None.
966
+ if not isinstance(arg_value, PartitionKind):
967
+ # Validate column existence in DataFrame only if user inputs a column(s).
968
+ _Validators._validate_dataframe_has_argument_columns(arg_value,
969
+ arg,
970
+ input_table_arg_value,
971
+ input_table_arg
972
+ )
973
+
974
+ order_column_arg_value = UtilFuncs._teradata_collapse_arglist(order_column_arg_value, "\"")
975
+
976
+ distribution, partition_column = self._get_distribution_and_partition_column(
977
+ partition_column_arg_value, hash_column_arg_value, input_attribute)
978
+
979
+ table_ref = AnalyticsWrapperUtils()._teradata_on_clause_from_dataframe(
980
+ input_table_arg_value, False)
981
+
982
+ self._func_input_arg_sql_names.append(input_attribute.get_sql_name())
983
+ self._func_input_table_view_query.append(table_ref["ref"])
984
+ self._func_input_dataframe_type.append(table_ref["ref_type"])
985
+ self._func_input_order_by_cols.append(order_column_arg_value)
986
+ self._func_input_distribution.append(distribution)
987
+ self._func_input_partition_by_cols.append(partition_column)
988
+ self._func_input_local_order.append(local_order_column_arg_value)
989
+
990
+ def _get_distribution_and_partition_column(self,
991
+ partition_column_arg_value,
992
+ hash_column_arg_value,
993
+ input_attribute):
994
+ """
995
+ DESCRIPTION:
996
+ Function to get the input distribution and partition column values to
997
+ process input table argument.
998
+
999
+ PARAMETERS:
1000
+ partition_column_arg_value:
1001
+ Required Argument.
1002
+ Specifies the partition column argument value.
1003
+ Types: str OR PartitionKind OR None.
1004
+
1005
+ hash_column_arg_value:
1006
+ Required Argument.
1007
+ Specifies the hash column argument value.
1008
+ Types: str
1009
+
1010
+ input_attribute:
1011
+ Required Argument.
1012
+ Specifies the input table attribute.
1013
+ Types: _AnlyFuncInput
1014
+
1015
+ RETURNS:
1016
+ tuple, with first element represents distribution and second element
1017
+ represents partition_column.
1018
+
1019
+ RAISES:
1020
+ None.
1021
+
1022
+ EXAMPLES:
1023
+ self._get_distribution_and_partition_column(partition_column_arg_val, hash_column_arg_val)
1024
+ """
1025
+ # If user passes hash_column_argument, generate the Query based on HASH BY
1026
+ # irrespective of the value of partition_column.
1027
+ if hash_column_arg_value:
1028
+ return "HASH", UtilFuncs._teradata_collapse_arglist(hash_column_arg_value, "\"")
1029
+
1030
+ # If user passes PartitionKind, generate Query based on distribution and partition type.
1031
+ if isinstance(partition_column_arg_value, PartitionKind):
1032
+ return self.__get_dist_partition_column_from_partition_kind(partition_column_arg_value)
1033
+
1034
+ # If user pass a string or list of strings for partition_column, generate PARTITION BY
1035
+ # based on partition column value.
1036
+ if partition_column_arg_value is not None:
1037
+ return "FACT", UtilFuncs._teradata_collapse_arglist(partition_column_arg_value, "\"")
1038
+ # No partition_column is sourced to input. So, derive the default one.
1039
+ else:
1040
+ default = input_attribute._get_default_partition_column_kind()
1041
+ return self.__get_dist_partition_column_from_partition_kind(default)
1042
+
1043
+ def __get_dist_partition_column_from_partition_kind(self, partition_kind):
1044
+ """
1045
+ DESCRIPTION:
1046
+ Function to get the distribution and partition column based on PartitionKind.
1047
+
1048
+ PARAMETERS:
1049
+ partition_kind:
1050
+ Required Argument.
1051
+ Specifies the type of Partition.
1052
+ Types: PartitionKind
1053
+
1054
+ RETURNS:
1055
+ tuple, with first element represents distribution and second element
1056
+ represents partition_column.
1057
+
1058
+ RAISES:
1059
+ None.
1060
+
1061
+ EXAMPLES:
1062
+ self.__get_dist_partition_column_from_partition_kind(PartitionKind.ONE)
1063
+ """
1064
+ if partition_kind in (PartitionKind.ANY, PartitionKind.ONE):
1065
+ return "FACT", partition_kind.value
1066
+ elif partition_kind == PartitionKind.DIMENSION:
1067
+ return PartitionKind.DIMENSION.value, None
1068
+ # Else is for PartitionKind.NONE.
1069
+ else:
1070
+ return "NONE", "NA_character_"
1071
+
1072
+ # Below code is not being used. Kept here to refer again.
1073
+ '''
1074
+ def _get_input_distribution_and_partition_column(self, input_table, partition_column_arg_value):
1075
+ """
1076
+ DESCRIPTION:
1077
+ Function to get the input distribution and partition column values to
1078
+ process input table argument.
1079
+
1080
+ PARAMETERS:
1081
+ input_table:
1082
+ Required Argument.
1083
+ Specifies the input table argument.
1084
+ Types: _AnlyFuncInput
1085
+
1086
+ partition_column_arg_value:
1087
+ Required Argument.
1088
+ Specifies the partition column argument value.
1089
+ Types: str
1090
+
1091
+ RETURNS:
1092
+ tuple, with first element represents distribution and second element
1093
+ represents partition_column.
1094
+
1095
+ RAISES:
1096
+ None.
1097
+
1098
+ EXAMPLES:
1099
+ self._get_input_distribution_and_partition_column(inp1, partition_column_arg)
1100
+ """
1101
+ # Initialise all the temporary variables and set those to False by default.
1102
+ is_dimension, is_partition_by_key, is_partition_by_any, is_partition_by_one = [False] * 4
1103
+ is_partition_by_one_only, is_partition_by_any_only = [False] * 2
1104
+
1105
+ # Get the partition kind from input table.
1106
+ partition_kind = input_table._get_partition_column_required_kind()
1107
+
1108
+ # Check whether associated input table requires to be partitioned
1109
+ # on any column or not.
1110
+ # Set some booleans based on what type of distribution is supported by
1111
+ # the argument.
1112
+ if partition_kind == PartitionKind.DIMENSION:
1113
+ is_dimension = True
1114
+ elif partition_kind == PartitionKind.DIMENSIONKEY:
1115
+ is_dimension, is_partition_by_key = True, True
1116
+ elif partition_kind == PartitionKind.DIMENSIONKEYANY:
1117
+ is_dimension, is_partition_by_any, is_partition_by_key = True, True, True
1118
+ elif partition_kind == PartitionKind.KEY:
1119
+ is_partition_by_key = True
1120
+ elif partition_kind == PartitionKind.ONE:
1121
+ is_partition_by_one, is_partition_by_key = True, True
1122
+ elif partition_kind == PartitionKind.ANY:
1123
+ is_partition_by_any, is_partition_by_key = True, True
1124
+ elif partition_kind == PartitionKind.ANYONLY:
1125
+ is_partition_by_any_only = True
1126
+ elif partition_kind == PartitionKind.ONEONLY:
1127
+ is_partition_by_one_only = True
1128
+
1129
+ collapse_arg_list = lambda partition_column_arg_value: "NA_character_" if partition_column_arg_value is None\
1130
+ else UtilFuncs._teradata_collapse_arglist(partition_column_arg_value, "\"")
1131
+
1132
+ default_partition_value = input_table._get_default_partition_by_value(partition_kind)
1133
+
1134
+ # When distribution is of type dimension, no partition by column required.
1135
+ if is_dimension and not is_partition_by_key and not is_partition_by_any:
1136
+ distribution = "DIMENSION"
1137
+ partition_column = "NA_character_"
1138
+ # When partitioned by either key or any, distribution should be FACT.
1139
+ elif is_dimension and (is_partition_by_key or is_partition_by_any):
1140
+ # If the input is not None, then distribution should be FACT. Otherwise, DIMENSION.
1141
+ distribution = "DIMENSION"
1142
+ if (partition_column_arg_value is not None and is_partition_by_key):
1143
+ distribution = "FACT"
1144
+
1145
+ # Quote if input value is not same as default value.
1146
+ if self._awu._is_default_or_not(partition_column_arg_value, default_partition_value):
1147
+ partition_column = collapse_arg_list(partition_column_arg_value)
1148
+ else:
1149
+ partition_column = default_partition_value
1150
+
1151
+ elif partition_column_arg_value is not None and not is_partition_by_key and is_partition_by_any:
1152
+ distribution = "FACT"
1153
+ partition_column = "ANY"
1154
+ else:
1155
+ partition_column = "NA_character_"
1156
+ else:
1157
+ # When partitioned by either key or any, distribution should be FACT.
1158
+ if is_partition_by_any and not is_partition_by_key:
1159
+ distribution = "FACT"
1160
+ partition_column = "ANY"
1161
+ elif (is_partition_by_key and not is_partition_by_any and not is_partition_by_one) or\
1162
+ (is_partition_by_key and is_partition_by_any):
1163
+ distribution = "FACT"
1164
+ # If partition value is default value, Enclose it with double quotes.
1165
+ if default_partition_value is not None or default_partition_value != "":
1166
+ if self._awu._is_default_or_not(partition_column_arg_value, default_partition_value):
1167
+ partition_column = collapse_arg_list(partition_column_arg_value)
1168
+ else:
1169
+ partition_column = default_partition_value
1170
+ else:
1171
+ partition_column = UtilFuncs._teradata_collapse_arglist(partition_column_arg_value, "\"")
1172
+ elif is_partition_by_one:
1173
+ distribution = "FACT"
1174
+ # If partition value is 1, Enclose it with double quotes.
1175
+ if self._awu._is_default_or_not(partition_column_arg_value, "1"):
1176
+ partition_column = collapse_arg_list(partition_column_arg_value)
1177
+ else:
1178
+ partition_column = default_partition_value
1179
+ elif is_partition_by_any_only or is_partition_by_one_only:
1180
+ distribution = "FACT"
1181
+ partition_column = "{}".format(default_partition_value)
1182
+ else:
1183
+ distribution = "NONE"
1184
+ partition_column = "NA_character_"
1185
+
1186
+ return distribution, partition_column
1187
+ '''
1188
+
1189
+ def _process_function_output(self, **kwargs):
1190
+ """
1191
+ DESCRIPTION:
1192
+ Internal function to process the output tables. This function creates
1193
+ the required output DataFrames from the tables and a result list.
1194
+
1195
+ PARAMETERS:
1196
+ kwargs:
1197
+ Specifies the keyword arguments passed to a function.
1198
+
1199
+ RETURNS:
1200
+ None.
1201
+
1202
+ RAISES:
1203
+ None.
1204
+
1205
+ EXAMPLES:
1206
+ self._process_function_output()
1207
+ """
1208
+ for lang_name, table_name in self._function_output_table_map.items():
1209
+ out_table_name = UtilFuncs._extract_table_name(table_name)
1210
+ out_db_name = UtilFuncs._extract_db_name(table_name)
1211
+ df = self._awu._create_data_set_object(
1212
+ df_input=out_table_name, database_name=out_db_name, source_type="table")
1213
+ self._dyn_cls_data_members[lang_name] = df
1214
+ # Condition make sure that the first element always be result or output in _mlresults.
1215
+ if lang_name in ["output", "result"]:
1216
+ self._mlresults.insert(0, df)
1217
+ else:
1218
+ self._mlresults.append(df)
1219
+
1220
+ class _TableOperatorExecutor(_SQLEFunctionExecutor):
1221
+ """ Class to hold the attributes and provide methods to enable execution for Table Operators. """
1222
+ def __init__(self, func_name):
1223
+ """
1224
+ DESCRIPTION:
1225
+ Constructor for the class.
1226
+
1227
+ PARAMETERS:
1228
+ func_name:
1229
+ Required Argument.
1230
+ Specifies the name of the analytic function, which is exposed to the user.
1231
+ Types: str
1232
+
1233
+ RAISES:
1234
+ TypeError OR ValueError OR TeradataMlException
1235
+
1236
+ EXAMPLES:
1237
+ _TableOperatorExecutor("write_nos")
1238
+ """
1239
+ super().__init__(func_name, TeradataAnalyticFunctionTypes.TABLEOPERATOR.value)
1240
+
1241
+ # Lists to hold Input Argument (Table) Information
1242
+ self.__func_input_order_by_type = []
1243
+ self.__func_input_sort_ascending = []
1244
+ self.__func_input_nulls_first = []
1245
+
1246
+ def _generate_query(self, **kwargs):
1247
+ """
1248
+ DESCRIPTION:
1249
+ Function to generate the SQL query for TABLE OPERATOR function.
1250
+
1251
+ RETURNS:
1252
+ None.
1253
+
1254
+ RAISES:
1255
+ None.
1256
+
1257
+ EXAMPLES:
1258
+ self._generate_query()
1259
+ """
1260
+ self.__aqg_obj = TableOperatorQueryGenerator(function_name=self.func_name,
1261
+ func_input_arg_sql_names=self._func_input_arg_sql_names,
1262
+ func_input_table_view_query=self._func_input_table_view_query,
1263
+ func_input_dataframe_type=self._func_input_dataframe_type,
1264
+ func_input_distribution=self._func_input_distribution,
1265
+ func_input_partition_by_cols=self._func_input_partition_by_cols,
1266
+ func_input_order_by_cols=self._func_input_order_by_cols,
1267
+ func_other_arg_sql_names=self._func_other_arg_sql_names,
1268
+ func_other_args_values=self._func_other_args,
1269
+ func_other_arg_json_datatypes=self._func_other_arg_json_datatypes,
1270
+ func_output_args_sql_names=self._func_output_args_sql_names,
1271
+ func_output_args_values=self._func_output_args,
1272
+ func_input_order_by_type=self.__func_input_order_by_type,
1273
+ func_input_sort_ascending=self.__func_input_sort_ascending,
1274
+ func_input_nulls_first=self.__func_input_nulls_first,
1275
+ engine="ENGINE_SQL")
1276
+
1277
+ # Invoke call to SQL-MR generation.
1278
+ self.sqlmr_query = self.__aqg_obj._gen_table_operator_select_stmt_sql()
1279
+
1280
+ def _process_input_argument(self, **kwargs):
1281
+ """
1282
+ DESCRIPTION:
1283
+ Function to process input argument(s).
1284
+
1285
+ PARAMETERS:
1286
+ kwargs:
1287
+ Specifies the keyword arguments passed to a function.
1288
+
1289
+ RETURNS:
1290
+ None.
1291
+
1292
+ RAISES:
1293
+ None.
1294
+
1295
+ EXAMPLES:
1296
+ self._process_input_argument()
1297
+ """
1298
+ super()._process_input_argument(**kwargs)
1299
+ # Iterating over multiple input arguments if present.
1300
+ for index, input_attribute in enumerate(self._metadata.input_tables):
1301
+ # Extracting input argument name and value.
1302
+ input_table_arg = input_attribute.get_lang_name()
1303
+ input_table_arg_value = kwargs.get(input_table_arg)
1304
+ # No need to process further if no input argument.
1305
+ # Validation of this input is done in the parent class.
1306
+ if input_table_arg_value is None:
1307
+ continue
1308
+
1309
+ # Extracting argument names for partition, hash and is local ordered.
1310
+ partition_column_arg = "{}_partition_column".format(input_table_arg)
1311
+ hash_column_arg = "{}_hash_column".format(input_table_arg)
1312
+ is_local_ordered_arg = "local_order_{}".format(input_table_arg)
1313
+ order_column_arg = "{}_order_column".format(input_table_arg)
1314
+ # Extracting argument values for partition, hash and is local ordered.
1315
+ partition_column_value = kwargs.get(partition_column_arg, None)
1316
+ hash_column_value = kwargs.get(hash_column_arg, None)
1317
+ is_local_ordered_value = kwargs.get(is_local_ordered_arg, False)
1318
+ order_column_value = kwargs.get(order_column_arg, "NA_character_")
1319
+
1320
+ self._validate_hash_local_ordered_arguments(partition_column_arg, partition_column_value,
1321
+ hash_column_arg, hash_column_value,
1322
+ is_local_ordered_arg, is_local_ordered_value,
1323
+ order_column_arg, order_column_value,
1324
+ input_table_arg, input_table_arg_value)
1325
+
1326
+ if is_local_ordered_value:
1327
+ self.__func_input_order_by_type.append("LOCAL")
1328
+ if hash_column_value is None:
1329
+ self._func_input_distribution[index] = "NONE"
1330
+ else:
1331
+ self._func_input_distribution[index] = "HASH"
1332
+ self._func_input_partition_by_cols[index] = hash_column_value
1333
+ else:
1334
+ self.__func_input_order_by_type.append(None)
1335
+ if partition_column_value is None:
1336
+ self._func_input_distribution[index] = "NONE"
1337
+
1338
+ def _validate_hash_local_ordered_arguments(self, partition_column_arg, partition_column_value,
1339
+ hash_column_arg, hash_column_value,
1340
+ is_local_ordered_arg, is_local_ordered_value,
1341
+ order_column_arg, order_column_value,
1342
+ input_table_arg, input_table_arg_value):
1343
+ """
1344
+ DESCRIPTION:
1345
+ Function to validate the hash and local order function arguments. This function does
1346
+ the following validations
1347
+ * Check if Hash Column value is not empty string.
1348
+ * Check if "is local order" value is of type boolean.
1349
+ * Hash and order by can be used together as long as is_local_order = True.
1350
+ * Either hash or partition can be used.
1351
+ * Either local order by or partition by can be used.
1352
+
1353
+ PARAMETERS:
1354
+ partition_column_arg:
1355
+ Required Argument.
1356
+ Specifies the name of the partition by column argument.
1357
+ Type: str
1358
+
1359
+ partition_column_value:
1360
+ Required Argument.
1361
+ Specifies the value of the partition by column argument.
1362
+ Type: str
1363
+
1364
+ hash_column_arg:
1365
+ Required Argument.
1366
+ Specifies the name of the hash by column argument.
1367
+ Type: str
1368
+
1369
+ hash_column_value:
1370
+ Required Argument.
1371
+ Specifies the value of the hash by column argument.
1372
+ Type: str
1373
+
1374
+ is_local_ordered_arg:
1375
+ Required Argument.
1376
+ Specifies the name of the is local ordered argument.
1377
+ Type: str
1378
+
1379
+ is_local_ordered_value:
1380
+ Required Argument.
1381
+ Specifies the value of the is local ordered argument.
1382
+ Type: bool
1383
+
1384
+ order_column_arg:
1385
+ Required Argument.
1386
+ Specifies the name of the order by column argument.
1387
+ Type: str
1388
+
1389
+ order_column_value:
1390
+ Required Argument.
1391
+ Specifies the value of the ordere by column argument.
1392
+ Type: str
1393
+
1394
+ input_table_arg:
1395
+ Required Argument.
1396
+ Specifies the name of the input table provided to the function.
1397
+ Types: str
1398
+
1399
+ input_table_arg_value:
1400
+ Required Argument.
1401
+ Specifies the value of the input table provided to the function.
1402
+ Types: DataFrame
1403
+
1404
+ RETURNS:
1405
+ None
1406
+
1407
+ RAISES:
1408
+ TeradataMlException
1409
+
1410
+ EXAMPLES:
1411
+ self._validate_hash_local_ordered_arguments("data", DataFrame.from_table("ibm_stock")), **kwargs)
1412
+ """
1413
+ # Check for empty string and types(str or list) for hash column values.
1414
+ # Check for types for is local ordered values.
1415
+
1416
+ _Validators._validate_function_arguments([[hash_column_arg, hash_column_value, True, (str, list), True],
1417
+ [is_local_ordered_arg, is_local_ordered_value, True, bool, False]])
1418
+
1419
+ # Validate column existence in DataFrame.
1420
+ _Validators._validate_dataframe_has_argument_columns(hash_column_value,
1421
+ hash_column_arg,
1422
+ input_table_arg_value,
1423
+ input_table_arg
1424
+ )
1425
+
1426
+ # Hash and order by can be used together as long as is_local_order = True.
1427
+ if all([hash_column_value,
1428
+ order_column_value]) and not is_local_ordered_value:
1429
+ raise TeradataMlException(
1430
+ Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH,
1431
+ "{}' and '{}".format(hash_column_arg, order_column_arg),
1432
+ "{}=False".format(is_local_ordered_arg)),
1433
+ MessageCodes.CANNOT_USE_TOGETHER_WITH)
1434
+
1435
+ # Either hash or partition can be used.
1436
+ _Validators._validate_mutually_exclusive_arguments(hash_column_value,
1437
+ hash_column_arg,
1438
+ partition_column_value,
1439
+ partition_column_arg,
1440
+ skip_all_none_check=True)
1441
+
1442
+ # Either local order by or partition by can be used.
1443
+ _Validators._validate_mutually_exclusive_arguments(is_local_ordered_value,
1444
+ is_local_ordered_arg,
1445
+ partition_column_value,
1446
+ partition_column_arg,
1447
+ skip_all_none_check=True)
1448
+
1449
+ # local order by requires column name.
1450
+ if is_local_ordered_value and order_column_value is None:
1451
+ message = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING,
1452
+ order_column_arg, "{}=True".format(is_local_ordered_arg))
1453
+ raise TeradataMlException(message, MessageCodes.DEPENDENT_ARG_MISSING)
1454
+
1455
+ def _quote_collapse_other_args(self, argument, arg_value):
1456
+ """
1457
+ DESCRIPTION:
1458
+ Given a list as an argument this will single quote all the
1459
+ list elements and combine them into a single string separated by
1460
+ commas.
1461
+ Append single quote to the elements which are required to be quoted.
1462
+
1463
+ PARAMETERS:
1464
+ argument:
1465
+ Required Argument.
1466
+ Specifies the argument object (_AnlyArgumentBase).
1467
+ Types: _AnlyFuncArgument
1468
+
1469
+ arg_value:
1470
+ Required Argument.
1471
+ Specifies the arg_value to be quoted and combined.
1472
+ Types: list OR string OR int OR bool OR float
1473
+
1474
+ RETURNS:
1475
+ None
1476
+
1477
+ RAISES:
1478
+ None
1479
+
1480
+ EXAMPLES:
1481
+ self._quote_collapse_other_args(argument, arg_value)
1482
+ """
1483
+ arg_dtype = UtilFuncs._as_list(argument.get_data_type())
1484
+ for arg in arg_dtype:
1485
+ if arg.lower() in ("column", "columns", "column_names", "string", "boolean") and isinstance(arg_value,(str, bool)):
1486
+ return UtilFuncs._teradata_collapse_arglist(UtilFuncs._teradata_collapse_arglist(arg_value, "\'"), "'")
1487
+ else:
1488
+ return UtilFuncs._teradata_collapse_arglist(arg_value, "'")
1489
+
1490
+ class _UAFFunctionExecutor(_SQLEFunctionExecutor):
1491
+ """ Class to hold the attributes and provide methods to enable execution for UAF Functions. """
1492
+ def __init__(self, func_name):
1493
+ """
1494
+ DESCRIPTION:
1495
+ Constructor for the class.
1496
+
1497
+ PARAMETERS:
1498
+ func_name:
1499
+ Required Argument.
1500
+ Specifies the name of the analytic function, which is exposed to the user.
1501
+ Types: str
1502
+
1503
+ RAISES:
1504
+ TypeError OR ValueError OR TeradataMlException
1505
+
1506
+ EXAMPLES:
1507
+ _UAFFunctionExecutor("ArimaEstimate")
1508
+ """
1509
+ super().__init__(func_name, TeradataAnalyticFunctionTypes.UAF.value)
1510
+ self._func_other_args = {}
1511
+ self._func_input_fmt_arguments = {}
1512
+ self._func_output_fmt_arguments = {}
1513
+
1514
+ # Lists to hold Input Argument (Table) Information
1515
+ self._func_input_args = []
1516
+ self._func_input_filter_expr_args = []
1517
+
1518
+ # Lists to hold Output Table Information.
1519
+ self._func_output_args = None
1520
+ self._function_output_table_map = {}
1521
+ self._volatile_output = False
1522
+
1523
+ def _generate_query(self, volatile=False):
1524
+ """
1525
+ DESCRIPTION:
1526
+ Function to generate the SQL query for UAF function.
1527
+
1528
+ RETURNS:
1529
+ None.
1530
+
1531
+ RAISES:
1532
+ None.
1533
+
1534
+ EXAMPLES:
1535
+ self._generate_query()
1536
+ """
1537
+ query_generator = UAFQueryGenerator(function_name=self._metadata.sql_function_name,
1538
+ func_input_args=self._func_input_args,
1539
+ func_input_filter_expr_args=self._func_input_filter_expr_args,
1540
+ func_other_args=self._func_other_args ,
1541
+ func_output_args=self._func_output_args,
1542
+ func_input_fmt_args=self._func_input_fmt_arguments,
1543
+ func_output_fmt_args=self._func_output_fmt_arguments,
1544
+ volatile_output=volatile)
1545
+ self.sqlmr_query= query_generator._get_display_uaf()
1546
+
1547
+ def _process_input_argument(self, **kwargs):
1548
+ """
1549
+ DESCRIPTION:
1550
+ Function to process input argument(s).
1551
+
1552
+ PARAMETERS:
1553
+ kwargs:
1554
+ Specifies the keyword arguments passed to a function.
1555
+
1556
+ RETURNS:
1557
+ None.
1558
+
1559
+ RAISES:
1560
+ None.
1561
+
1562
+ EXAMPLES:
1563
+ self._process_input_argument()
1564
+ """
1565
+ from teradataml.dataframe.sql_interfaces import ColumnExpression
1566
+
1567
+ # Process the Input tables.
1568
+ # Get the list of input arguments from the JsonStore metadata
1569
+ for input_attribute in self._metadata.input_tables:
1570
+ # Get the input table arg name.
1571
+ input_table_arg = input_attribute.get_lang_name()
1572
+
1573
+ # Get the value of input table arg.
1574
+ input_table_arg_value = kwargs.get(input_table_arg, None)
1575
+ self._validate_analytic_function_argument(input_table_arg,
1576
+ input_table_arg_value,
1577
+ input_attribute)
1578
+
1579
+ # Form the 'filter_expr' key name (User provided input).
1580
+ filter_exp_arg = "{}_filter_expr".format(input_table_arg)
1581
+ # Get the 'filter_expr' value.
1582
+ filter_exp_arg_value = kwargs.get(filter_exp_arg, None)
1583
+
1584
+ # If 'filter_expr' is passed and 'data' is None, raise
1585
+ # dependent argument exception.
1586
+ if filter_exp_arg_value is not None and \
1587
+ input_table_arg_value is None:
1588
+ # Raise error, if "data" not provided and "data_filter_expr" is provided.
1589
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARGUMENT,
1590
+ filter_exp_arg,
1591
+ input_table_arg)
1592
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARGUMENT)
1593
+
1594
+ # 'filter_expr' argument validation (User provided input).
1595
+ arg_info = []
1596
+ arg_info.append([filter_exp_arg, filter_exp_arg_value, True,
1597
+ (ColumnExpression), False])
1598
+
1599
+ # Validate 'filter_expr' argument types (User provided input).
1600
+ _Validators._validate_function_arguments(arg_info)
1601
+
1602
+ # If data is not None, then add 'data' and 'filter_expr' to lists.
1603
+ if input_table_arg_value is not None:
1604
+ # Append the lists.
1605
+ self._func_input_args.append(input_table_arg_value)
1606
+ self._func_input_filter_expr_args.append(filter_exp_arg_value)
1607
+
1608
+ def _process_function_output(self, **kwargs):
1609
+ """
1610
+ DESCRIPTION:
1611
+ Internal function to process the output tables. This function creates
1612
+ the required output DataFrames from the tables and a result list.
1613
+
1614
+ PARAMETERS:
1615
+ None.
1616
+
1617
+ RETURNS:
1618
+ None.
1619
+
1620
+ RAISES:
1621
+ None.
1622
+
1623
+ EXAMPLES:
1624
+ self._process_function_output()
1625
+ """
1626
+ volatile = kwargs.get("volatile", False)
1627
+ persist = kwargs.get("persist", False)
1628
+ output_db_name = kwargs.get("output_db_name")
1629
+
1630
+ # Since the regular function will always refer to latest value, creating
1631
+ # a closure here. The function will go as an attribute to dynamically
1632
+ # created object.
1633
+ def _parent(layer_name, table_name, query=None):
1634
+
1635
+ def _layer(self):
1636
+ if self._data.get(layer_name) is None:
1637
+ from teradataml import DataFrame, in_schema
1638
+ # Execute the Query, create a DataFrame and attach it.
1639
+ if query:
1640
+ UtilFuncs._execute_query(query=query)
1641
+ _db_name, _table_name = UtilFuncs._extract_db_name(table_name), \
1642
+ UtilFuncs._extract_table_name(table_name)
1643
+ _table_name = in_schema(_db_name, _table_name) if _db_name else _table_name
1644
+ self._data[layer_name] = DataFrame.from_table(table_name)
1645
+
1646
+ return self._data[layer_name]
1647
+
1648
+ return _layer
1649
+
1650
+ for output_table in self._metadata.output_tables[1:]:
1651
+ layer_name = output_table.get_layer_name()
1652
+ exposed_layer_name = output_table.get_lang_name()
1653
+
1654
+ # Creating the ART Spec here instead of creating an object of TDSeries to
1655
+ # save additional imports and processing.
1656
+ _art_spec = "ART_SPEC(TABLE_NAME({}), LAYER({}))".format(self._function_output_table_map["result"],
1657
+ layer_name)
1658
+
1659
+ # Generate table name.
1660
+ func_params = self._get_generate_temp_table_params(persist=persist,
1661
+ output_db=output_db_name)
1662
+ _table_name = UtilFuncs._generate_temp_table_name(**func_params)
1663
+
1664
+ # Generate Query.
1665
+ UAF_Query = UAFQueryGenerator(function_name="TD_EXTRACT_RESULTS",
1666
+ func_input_args=_art_spec,
1667
+ func_input_filter_expr_args={},
1668
+ func_other_args={},
1669
+ func_input_fmt_args={},
1670
+ func_output_args=_table_name,
1671
+ volatile_output=volatile,
1672
+ ctas=True)
1673
+
1674
+ query = UAF_Query._get_display_uaf()
1675
+
1676
+ # Store the internal function in a dict. While storing it, convert it to
1677
+ # a property so user do not need to call it.
1678
+ self._dyn_cls_data_members[exposed_layer_name] = property(
1679
+ _parent(exposed_layer_name, _table_name, query))
1680
+
1681
+ # 'result' attribute in UAF Function object should point to output table.
1682
+ self._dyn_cls_data_members["result"] = property(
1683
+ _parent("result", self._function_output_table_map["result"]))
1684
+
1685
+ # To make lazy execution, we will add additional attributes to UAF Function object.
1686
+ # Mask those additional attributes by overwriting the __dir__ method.
1687
+ attrs = list(self._dyn_cls_data_members.keys())
1688
+ self._dyn_cls_data_members["__dir__"] = lambda self: super(self.__class__).__dir__() + attrs
1689
+
1690
+ # Add a variable _data to output object so that the layers DataFrame
1691
+ # will be stored in this variable.
1692
+ self._dyn_cls_data_members["_data"] = {}
1693
+
1694
+ def _get_generate_temp_table_params(self, persist=False, output_db=None, volatile=False):
1695
+ """
1696
+ DESCRIPTION:
1697
+ Function to get the required parameters to create either table or view.
1698
+ When function has output table arguments or argument persist is set to True,
1699
+ then function returns parameters to create table otherwise returns parameters
1700
+ to create view. If persist is set to True or volatile is set to True, in such cases,
1701
+ tables created are not garbage collected.
1702
+
1703
+ PARAMETERS:
1704
+ persist:
1705
+ Optional Argument.
1706
+ Specifies whether to persist the output table or not.
1707
+ When set to True, output tables created are not garbage collected
1708
+ at the end of the session, otherwise they are garbage collected.
1709
+ Default Value: False
1710
+ Types: bool
1711
+
1712
+ output_db:
1713
+ Optional Argument.
1714
+ Specifies the output DataBase name to create the output tables.
1715
+ Default Value: False
1716
+ Types: str
1717
+
1718
+ volatile:
1719
+ Optional Argument.
1720
+ Specifies whether table to create is a volatile table or not.
1721
+ Default Value: False
1722
+ Types: bool
1723
+
1724
+ RETURNS:
1725
+ dict
1726
+
1727
+ RAISES:
1728
+ None
1729
+
1730
+ EXAMPLES:
1731
+ self._get_generate_temp_table_params(True, True)
1732
+ """
1733
+ prefix = "td_uaf_out_"
1734
+ gc_on_quit = True
1735
+ # If result is to be persisted then, it must not be Garbage collected.
1736
+ if persist or volatile:
1737
+ gc_on_quit = False
1738
+ prefix = "td_uaf_{}_out_".format("persist" if persist else "volatile")
1739
+
1740
+ return {"table_type": self.db_object_type,
1741
+ "prefix": prefix,
1742
+ "gc_on_quit": gc_on_quit,
1743
+ "databasename": output_db if output_db else _get_context_temp_databasename(
1744
+ table_type=self.db_object_type)}
1745
+
1746
+ def _process_output_argument(self, **kwargs):
1747
+ """
1748
+ DESCRIPTION:
1749
+ Function to process output argument(s) of UAF function.
1750
+
1751
+ PARAMETERS:
1752
+ kwargs:
1753
+ Specifies the keyword arguments passed to a function.
1754
+
1755
+ RETURNS:
1756
+ None.
1757
+
1758
+ RAISES:
1759
+ TypeError, ValueError, TeradataMlException.
1760
+
1761
+ EXAMPLES:
1762
+ self._process_output_argument()
1763
+ """
1764
+ # If kwargs not provided, initialize it with default value.
1765
+ volatile = kwargs.get("volatile", False)
1766
+ persist = kwargs.get("persist", False)
1767
+ output_table_name = kwargs.get("output_table_name", None)
1768
+ output_db_name = kwargs.get("output_db_name", None)
1769
+
1770
+ arg_info = []
1771
+ arg_info.append(["volatile", volatile, False, (bool)])
1772
+ arg_info.append(["persist", persist, False, (bool)])
1773
+ arg_info.append(["output_table_name", output_table_name, True, (str), True])
1774
+ arg_info.append(["output_db_name", output_db_name, True, (str), True])
1775
+
1776
+ _Validators._validate_function_arguments(arg_info)
1777
+
1778
+ # If table is name is not provided by user, generate the temp table name.
1779
+ # Else, get fully qualified table name.
1780
+ if output_table_name is None:
1781
+ # Generate the name of the table, if not provide by user.
1782
+ func_params = self._get_generate_temp_table_params(persist=persist,
1783
+ output_db=output_db_name,
1784
+ volatile=volatile)
1785
+
1786
+ # Generate temp table name and add it to garbage collector.
1787
+ table_name = UtilFuncs._generate_temp_table_name(**func_params)
1788
+ else:
1789
+ # If database name is not provided by user, get the default database name
1790
+ # else use user provided database name.
1791
+ db_name = output_db_name if output_db_name is not None else \
1792
+ _get_context_temp_databasename(table_type=self.db_object_type)
1793
+
1794
+ # Get the fully qualified table name.
1795
+ table_name = "{}.{}".format(UtilFuncs._teradata_quote_arg(db_name,
1796
+ "\"", False),
1797
+ UtilFuncs._teradata_quote_arg(output_table_name,
1798
+ "\"", False))
1799
+
1800
+ # If persist is set to False, add the table name to
1801
+ # Garbage collector.
1802
+ if not persist:
1803
+ GarbageCollector._add_to_garbagecollector(table_name)
1804
+
1805
+ # Populate the output arg, output table map and volatile output.
1806
+ self._func_output_args = table_name
1807
+ self._function_output_table_map["result"] = table_name
1808
+ self._volatile_output = volatile
1809
+
1810
+ def __process_individual_argument(self, argument, **kwargs):
1811
+ """
1812
+ DESCRIPTION:
1813
+ Internal function to process the individual arguments.
1814
+ 1. If the argument does not have nested parameters and is present in kwargs,
1815
+ the function does the following:
1816
+ * Checks the required arguments are passed or not.
1817
+ * Checks the type of the arguments are expected or not.
1818
+ * Checks for permitted values.
1819
+ * Checks for empty string.
1820
+ * If validations run fine,
1821
+ then returns a dict with the SQL name of the argument as key
1822
+ and user provided value as the value.
1823
+ * Dictornary without nested parameters is formed as below:
1824
+ {arg_sql_name : value}
1825
+ 2. If the argument has nested params:
1826
+ * Function loops over the nested parameter and calls itself recursively
1827
+ on the nested parameters and repeats the process.
1828
+ * Dictonary with nested arguments are formed as below:
1829
+ { Parent_sql_name : { Child1_sql_name : value, Child2_sql_name : value}}
1830
+
1831
+ PARAMETERS:
1832
+ argument:
1833
+ Required Argument.
1834
+ Specifies the argument object (_AnlyFuncArgument).
1835
+ Types: _AnlyFuncArgument
1836
+
1837
+ kwargs:
1838
+ Specifies the keyword arguments passed to a function.
1839
+
1840
+ RETURNS:
1841
+ None.
1842
+
1843
+ RAISES:
1844
+ ValueError OR TypeError OR TeradataMlException.
1845
+
1846
+ EXAMPLES:
1847
+ self._process_other_arguments(argument, arg1="string", arg2="db", arg3=2)
1848
+
1849
+ """
1850
+ sql_name = argument.get_name()
1851
+ lang_name = argument.get_lang_name()
1852
+ arg_value = kwargs.get(lang_name)
1853
+ # Set the "argument".
1854
+ self._spl_func_obj.set_arg_name(argument)
1855
+ # Let's get spl handler if function requires.
1856
+ special_case_handler = self._spl_func_obj._get_handle()
1857
+
1858
+ if len(argument.get_nested_param_list()) == 0:
1859
+ self._validate_analytic_function_argument(lang_name, arg_value, argument)
1860
+ # If argument is not None and it is not equal to the default value,
1861
+ # add the sql_name and arg_value to the dict else return an empty dict
1862
+ if arg_value is not None and arg_value != argument.get_default_value():
1863
+
1864
+ # If get_match_length_of_arguments is True, check if the arg_value is
1865
+ # a list and of the required size.
1866
+ if argument.get_match_length_of_arguments():
1867
+ required_length = argument.get_required_length()
1868
+ if (isinstance(arg_value, list) and len(arg_value) != required_length) or\
1869
+ (not isinstance(arg_value, list)):
1870
+ raise TeradataMlException(Messages.get_message(
1871
+ MessageCodes.INVALID_LIST_LENGTH).format(lang_name,
1872
+ required_length),
1873
+ MessageCodes.INVALID_LIST_LENGTH)
1874
+
1875
+ # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
1876
+ # Check lower bound and upper bound for numeric arguments.
1877
+ if isinstance(arg_value, (int, float)):
1878
+ lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
1879
+ upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
1880
+ _Validators._validate_argument_range(arg_value,
1881
+ lang_name,
1882
+ lbound=argument.get_lower_bound(),
1883
+ ubound=argument.get_upper_bound(),
1884
+ lbound_inclusive=lower_bound_inclusive,
1885
+ ubound_inclusive=upper_bound_inclusive)
1886
+
1887
+ # If the argument is a bool type, convert it to integer since SQL do
1888
+ # not know boolean processing.
1889
+ if bool in argument.get_python_type() and isinstance(arg_value, bool):
1890
+ arg_value = int(arg_value)
1891
+
1892
+ # Handle special cases for "arg_values" based on handling method.
1893
+ arg_value = special_case_handler(arg_value) if special_case_handler is not None else arg_value
1894
+ return {sql_name : arg_value}
1895
+ return {}
1896
+ else:
1897
+ temp_dict = {}
1898
+ for nested_arg in argument.get_nested_param_list():
1899
+ temp_dict.update(self.__process_individual_argument(nested_arg, **kwargs))
1900
+ return_dict = {sql_name : temp_dict} if temp_dict else {}
1901
+ return return_dict
1902
+
1903
+ def _process_other_argument(self, **kwargs):
1904
+ """
1905
+ DESCRIPTION:
1906
+ Function to process the metadata arguments. It does the following:
1907
+ * Iterates over the metadata arguments, calls __process_individual_argument
1908
+ for each argument and populates the dict '_func_other_args'.
1909
+
1910
+ PARAMETERS:
1911
+ kwargs:
1912
+ Specifies the keyword arguments passed to a function.
1913
+
1914
+ RETURNS:
1915
+ None.
1916
+
1917
+ RAISES:
1918
+ ValueError OR TypeError OR TeradataMlException.
1919
+
1920
+ EXAMPLES:
1921
+ self._process_other_arguments(arg1="string", arg2="db", arg3=2)
1922
+ """
1923
+ for argument in self._metadata.arguments:
1924
+ self._func_other_args.update(self.__process_individual_argument(argument, **kwargs))
1925
+
1926
+ # Process the InputFmt arguments.
1927
+ for input_fmt_argument in self._metadata.input_fmt_arguments:
1928
+ self._func_input_fmt_arguments.update(
1929
+ self.__process_individual_argument(input_fmt_argument,
1930
+ **kwargs))
1931
+
1932
+ # Process the OutputFmt arguments.
1933
+ for output_fmt_argument in self._metadata.output_fmt_arguments:
1934
+ self._func_output_fmt_arguments.update(
1935
+ self.__process_individual_argument(output_fmt_argument,
1936
+ **kwargs))
1937
+
1938
+ @collect_queryband(attr="func_name")
1939
+ def _execute_query(self, persist=False, volatile=None):
1940
+ """
1941
+ DESCRIPTION:
1942
+ Function to execute query on Vantage.
1943
+
1944
+ PARAMETERS:
1945
+ persist:
1946
+ Optional Argument.
1947
+ Specifies whether to persist a table or not.
1948
+ Default Value: False
1949
+ Type: bool
1950
+
1951
+ RETURNS:
1952
+ None
1953
+
1954
+ RAISES:
1955
+ TeradataMlException
1956
+
1957
+ EXAMPLES:
1958
+ self._execute_query()
1959
+ """
1960
+ try:
1961
+ # Execute already generated query.
1962
+ UtilFuncs._execute_query(query=self.sqlmr_query)
1963
+
1964
+ if persist:
1965
+ # SQL is already executed. So, print the table names.
1966
+ for output_attribute, table_name in self._function_output_table_map.items():
1967
+ print("{} data stored in table '{}'".format(output_attribute, table_name))
1968
+
1969
+ except Exception as emsg:
1970
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
1971
+ MessageCodes.TDMLDF_EXEC_SQL_FAILED)
1972
+
1973
+
1974
+ class _BYOMFunctionExecutor(_SQLEFunctionExecutor):
1975
+ def __init__(self, func_name):
1976
+ """
1977
+ DESCRIPTION:
1978
+ Constructor for the class.
1979
+
1980
+ PARAMETERS:
1981
+ func_name:
1982
+ Required Argument.
1983
+ Specifies the name of the analytic function, which is exposed to the user.
1984
+ Types: str
1985
+
1986
+ RAISES:
1987
+ None
1988
+
1989
+ EXAMPLES:
1990
+ _BYOMFunctionExecutor("ONNXPredict")
1991
+ """
1992
+ super().__init__(func_name, TeradataAnalyticFunctionTypes.BYOM.value)
1993
+
1994
+ def _generate_query(self, volatile=False):
1995
+ """
1996
+ DESCRIPTION:
1997
+ Function to generate the SQL query for BYOM analytic function.
1998
+
1999
+ PARAMETERS:
2000
+ volatile:
2001
+ Optional Argument.
2002
+ Specifies whether to create a volatile table or not.
2003
+ Default Value: False
2004
+ Type: bool
2005
+
2006
+ RETURNS:
2007
+ None.
2008
+
2009
+ RAISES:
2010
+ None.
2011
+
2012
+ EXAMPLES:
2013
+ self._generate_query()
2014
+ """
2015
+ # Check for byom install location and
2016
+ # update the db_name.
2017
+ db_name = None
2018
+ if configure.byom_install_location is not None:
2019
+ db_name = configure.byom_install_location
2020
+
2021
+ self.__aqg_obj = AnalyticQueryGenerator(function_name=self._metadata.sql_function_name,
2022
+ func_input_arg_sql_names=self._func_input_arg_sql_names,
2023
+ func_input_table_view_query=self._func_input_table_view_query,
2024
+ func_input_dataframe_type=self._func_input_dataframe_type,
2025
+ func_input_distribution=self._func_input_distribution,
2026
+ func_input_partition_by_cols=self._func_input_partition_by_cols,
2027
+ func_input_order_by_cols=self._func_input_order_by_cols,
2028
+ func_other_arg_sql_names=self._func_other_arg_sql_names,
2029
+ func_other_args_values=self._func_other_args,
2030
+ func_other_arg_json_datatypes=self._func_other_arg_json_datatypes,
2031
+ func_output_args_sql_names=self._func_output_args_sql_names,
2032
+ func_output_args_values=self._func_output_args,
2033
+ engine="ENGINE_SQL",
2034
+ db_name=db_name,
2035
+ volatile_output=volatile,
2036
+ skip_config_lookup=True,
2037
+ func_input_local_order=self._func_input_local_order)
2038
+
2039
+ # Invoke call to SQL-MR generation.
2040
+ self.sqlmr_query = self.__aqg_obj._gen_sqlmr_select_stmt_sql()