teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -1,1166 +1,1179 @@
1
- #!/usr/bin/python
2
- # ##################################################################
3
- #
4
- # Copyright 2019 Teradata. All rights reserved.
5
- # TERADATA CONFIDENTIAL AND TRADE SECRET
6
- #
7
- # Primary Owner: Rohit Khurd (rohit.khurd@teradata.com
8
- # Secondary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
9
- #
10
- # This file implements APIs and utility functions for set operations.
11
- # ##################################################################
12
-
13
- import inspect, importlib
14
- from collections import OrderedDict
15
- from teradataml.common.exceptions import TeradataMlException
16
- from teradataml.common.messages import Messages
17
- from teradataml.common.messagecodes import MessageCodes
18
- from teradataml.common.utils import UtilFuncs
19
- from teradataml.dataframe import dataframe
20
- from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
21
- from teradataml.common.aed_utils import AedUtils
22
- from teradataml.utils.validators import _Validators
23
- from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
24
- from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
25
- from teradatasql import OperationalError
26
-
27
- module = importlib.import_module("teradataml")
28
-
29
- def __validate_setop_args(df_list, awu_matrix, setop_type):
30
- """
31
- DESCRIPTION:
32
- Internal function to check for the validity of the input arguments.
33
-
34
- PARAMETERS:
35
- df_list:
36
- Required argument.
37
- Specifies the list of teradataml DataFrames.
38
- Types: list of teradataml DataFrames
39
-
40
- awu_matrix:
41
- Required argument.
42
- Specifies the argument is expected to be a list of arguments, expected types are
43
- mentioned as type or tuple.
44
-
45
- setop_type:
46
- Required argument.
47
- Specifies the type of SET Operation to be performed.
48
- Types: str
49
-
50
- RAISES:
51
- TeradataMlException
52
-
53
- EXAMPLES:
54
- __validate_setop_args(df_list, awu_matrix, setop_type)
55
-
56
- """
57
- # Validate argument types
58
- _Validators._validate_function_arguments(awu_matrix)
59
-
60
- # Validate the number of dfs in df_list
61
- if len(df_list) < 2:
62
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_INVALID_DF_COUNT,
63
- setop_type),
64
- MessageCodes.SETOP_INVALID_DF_COUNT)
65
-
66
- # Validate if all items in df_list are DataFrames
67
- for i in range(len(df_list)):
68
- _Validators._validate_function_arguments([['df_list[{0}]'.format(i), df_list[i],
69
- False, (dataframe.DataFrame)]])
70
-
71
- # Validate number of columns for 'td_intersect' and 'td_minus'
72
- if setop_type in ['td_intersect', 'td_minus', 'td_except']:
73
- it = iter(df_list[i].columns for i in range(len(df_list)))
74
- the_len = len(next(it))
75
- if not all(len(l) == the_len for l in it):
76
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_DF_LENGTH),
77
- MessageCodes.INVALID_DF_LENGTH)
78
-
79
- def __check_concat_compatibility(df_list, join, sort, ignore_index):
80
- """
81
- DESCRIPTION:
82
- Internal function to check if the DataFrames are compatible for concat or not.
83
-
84
- PARAMETERS:
85
- df_list:
86
- Required argument.
87
- Specifies the list of teradataml DataFrames to be concatenated.
88
- Type: list of teradataml DataFrames
89
-
90
- join:
91
- Required argument.
92
- Specifies the type of join to use in concat ('inner' or 'outer').
93
- Type: str
94
-
95
- sort:
96
- Required argument.
97
- Specifies a flag to determine whether the columns should be sorted while being projected.
98
- Type: bool
99
-
100
- ignore_index:
101
- Required argument.
102
- Specifies whether to ignore the index columns in resulting DataFrame or not.
103
- Types: bool
104
-
105
- RETURNS:
106
- A tuple of the following form:
107
- (master_column_dict, is_lazy)
108
-
109
- where master_column_dict is a dictionary with the column names to project as a result as the keys,
110
- and is of the following form:
111
- {
112
- '<col_name_1>' : {
113
- 'col_present' : [True, False],
114
- 'col_type': <type>
115
- },
116
- '<col_name_2>' : {
117
- ...
118
- },
119
- ...
120
- }
121
-
122
- The value of the keys in the dictionary is again a dictionary with the following elements:
123
- 1. 'col_present': A list of booleans, the nth value in it indicating the columns presence in the nth DF.
124
- Presence specified by True, and absence by False,
125
- 2. 'col_type': The teradatasqlalchemy datatype of the column in the first DF that the column is present in,
126
-
127
- and 'is_lazy' is a boolean which indicates whether the result DataFrame creation should be a lazy operation
128
- or not, based on the column type compatibility.
129
-
130
- RAISES:
131
- None
132
-
133
- EXAMPLES:
134
- columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort)
135
- """
136
- dfs_to_operate_on = df_list
137
-
138
- # Initialize the return objects including a variable deciding whether the execution is lazy or not.
139
- # The execution will be non-lazy if the types of columns are not an exact match.
140
- # TODO: Add a set operation type compatibility matrix for use to make this operation completely lazy
141
- # https://jira.td.teradata.com/jira/browse/ELE-1913
142
-
143
- col_dict = OrderedDict()
144
- is_lazy = True
145
-
146
- # Iterate on all DFs to be applied for set operation.
147
- for df in dfs_to_operate_on:
148
- # Process each column in the DF of the iteration.
149
- for c in df._metaexpr.t.c:
150
- col_name = c.name
151
- # Process the column name if it is not already processed.
152
- # Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
153
- # then the column with the same name in any DF in later iterations need not be processed.
154
- if col_name not in col_dict:
155
- # For every column, it's entry in the dictionary looks like:
156
- # '<column_name>' : { 'col_present' : [True, False], 'col_type': <type> }
157
- # where :
158
- # '<column_name>' : is the name of the column being processed.
159
- #
160
- # It's value is yet another dictionary with keys:
161
- # 'col_present' : Its value is a list of booleans, the nth value in it indicating the
162
- # columns presence in the nth DF - presence specified by True,
163
- # and absence by False.
164
- # 'col_type' : Its value is the teradatasqlalchemy type of the column in the first DF
165
- # that the column is present in.
166
-
167
- # Generate a list of booleans, each value of it indicating the columns presence in the DF in the
168
- # dfs_to_operate_on list. If ignore_index is True then assign False so that we can ignore when
169
- # forming dict.
170
-
171
- col_present_in_dfs = []
172
- for inner_df in dfs_to_operate_on:
173
- col_present_in_df = None
174
- if ignore_index and inner_df.index and col_name in inner_df._index_label:
175
- col_present_in_df = False
176
- else:
177
- col_present_in_df = df_utils._check_column_exists(col_name, inner_df.columns)
178
- col_present_in_dfs.append(col_present_in_df)
179
-
180
- if join.upper() == 'INNER':
181
- # For inner join, column has to present in all DFs.
182
- if all(col_present_in_dfs):
183
- col_dict[col_name] = {}
184
-
185
- # Get the type of the column in all the DFs.
186
- col_types_in_dfs = [inner_df._metaexpr.t.c[col_name].type for inner_df in
187
- dfs_to_operate_on]
188
-
189
- # Populate the 'column_present' list using the col_present_in_dfs.
190
- col_dict[col_name]['col_present'] = col_present_in_dfs
191
- # The type to be used for the column is the one of the first DF it is present in.
192
- col_dict[col_name]['col_type'] = col_types_in_dfs[0]
193
-
194
- # If the type of the column in all DFs is not the same, then the operation is not lazy.
195
- if not all(ctype == col_dict[col_name]['col_type']
196
- for ctype in col_types_in_dfs):
197
- is_lazy = False
198
-
199
- elif join.upper() == 'OUTER':
200
- # If the column is marked as False for all DataFrames
201
- if not any(col_present_in_dfs):
202
- pass
203
- else:
204
- # For outer join, column need not be present in all DFs.
205
- col_dict[col_name] = {}
206
- # Get the type of the column in all the DFs. None for the DF it is not present in.
207
- col_types_in_dfs = [None if not present else inner_df._metaexpr.t.c[col_name].type
208
- for (inner_df, present) in zip(dfs_to_operate_on, col_present_in_dfs)]
209
-
210
- # Find the type of the column in the first DF it is present in.
211
- non_none_type_to_add = next(ctype for ctype in col_types_in_dfs if ctype is not None)
212
-
213
- # Populate the 'column_present' list using the col_present_in_dfs.
214
- col_dict[col_name]['col_present'] = col_present_in_dfs
215
- # The type to be used for the column is the one of the first DF it is present in.
216
- col_dict[col_name]['col_type'] = non_none_type_to_add
217
-
218
- # If the type of the column in all DFs is not the same, then the operation is not lazy.
219
- if not all(True if ctype is None else ctype == non_none_type_to_add
220
- for ctype in col_types_in_dfs):
221
- is_lazy = False
222
-
223
- # Sort if required
224
- if sort and join.upper() == 'OUTER':
225
- col_dict = OrderedDict(sorted(col_dict.items()))
226
-
227
- # If the result has no columns, i.e. no data
228
- if len(col_dict) < 1:
229
- raise TeradataMlException(Messages.get_message(MessageCodes.DF_WITH_NO_COLUMNS),
230
- MessageCodes.DF_WITH_NO_COLUMNS)
231
-
232
- return col_dict, is_lazy
233
-
234
- def __check_setop_if_lazy(df_list):
235
- """
236
- DESCRIPTION:
237
- Internal function to check if the teradataml DataFrames column types are compatible for
238
- any set operation or not.
239
-
240
- PARAMETERS:
241
- df_list:
242
- Required argument.
243
- Specifies the list of teradataml DataFrames.
244
- Types: list of teradataml DataFrames
245
-
246
- RETURNS:
247
- A boolean 'is_lazy' which indicates whether the result DataFrame creation should be a
248
- lazy operation or not.
249
-
250
- RAISES:
251
- None
252
-
253
- EXAMPLES:
254
- is_lazy = __check_setop_if_lazy(df_list)
255
- """
256
-
257
- # Initialize the return variable deciding whether the execution is lazy or not.
258
- # The execution will be non-lazy if the types of columns are not an exact match.
259
- is_lazy = True
260
-
261
- # Take first df's metadata for columns and then iterate for column_names on first DF which
262
- # has to be projected for any set operation.
263
- for i, col in enumerate(df_list[0]._metaexpr.t.c):
264
- for k in range(1, len(df_list)) :
265
- next_df_cols = df_list[k].columns
266
- next_df_type = df_list[k]._metaexpr.t.c[next_df_cols[i]].type
267
- if (type(next_df_type) != type(col.type)):
268
- is_lazy = False
269
-
270
- return is_lazy
271
-
272
- def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name = None):
273
- """
274
- DESCRIPTION:
275
- Internal function to process the columns as per given nodeid and setop_type, and
276
- return the result DataFrame.
277
-
278
- PARAMETERS:
279
- meta_data:
280
- Required argument.
281
- Specifies either a metaexpr for the first DataFrame or a dictionary with the
282
- column names as dictionary keys to be projected as a result. If a dict, the value
283
- of the keys in the dictionary is again a dictionary with the elements mentioning
284
- column presence and its type.
285
- Types: _MetaExpression, OrderedDict
286
-
287
- is_lazy:
288
- Required argument.
289
- Specifies a boolean based on the column type compatibility, indicating
290
- whether set operation is lazy or not.
291
- Types: bool
292
-
293
- setop_type:
294
- Required argument.
295
- Specifies the type of SET Operation to be performed.
296
- Types: str
297
-
298
- nodeid:
299
- Required argument.
300
- node id for the teradataml DataFrame.
301
-
302
- index_label:
303
- Required argument.
304
- Specifies list of index columns for teradataml DataFrame.
305
- Types: list
306
-
307
- index_to_use:
308
- Required argument.
309
- Specifies column(s) which can also be part of final index_label list.
310
- Types: list
311
-
312
- class_name:
313
- Optional argument.
314
- Specifies the name of the class for the first dataframe for deciding the
315
- return type of the output dataframe.
316
- Default: None
317
- Types: String
318
-
319
- RETURNS:
320
- teradataml DataFrame
321
-
322
- RAISES:
323
- TeradataMlException
324
-
325
- EXAMPLES:
326
- >>> __process_operation(meta_data, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
327
-
328
- """
329
-
330
- # Separate processing for concat and other set operators as concat has OrderedDict as metadata.
331
- if setop_type == 'concat':
332
- class_name = "DataFrame"
333
- column_info = list((col_name, meta_data[col_name]['col_type']) for col_name in meta_data)
334
- for col in column_info:
335
- if isinstance(col[1], (GEOMETRY, MBR, MBB)):
336
- class_name = "GeoDataFrame"
337
- break
338
-
339
- # Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
340
- meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info) if is_lazy else meta_data
341
-
342
- if is_lazy:
343
- return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
344
- else:
345
- try:
346
- # Execute node and get table_name to build DataFrame on.
347
- table_name = df_utils._execute_node_return_db_object_name(nodeid)
348
- return getattr(module, class_name).from_table(table_name, index_label=index_to_use)
349
- except TeradataMlException as err:
350
- # We should be here only because of failure caused in creating DF.
351
- # due to incompatible types, but a TeradataMLException is raised when DF creation fails.
352
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_COL_TYPE_MISMATCH, setop_type),
353
- MessageCodes.SETOP_COL_TYPE_MISMATCH) from err
354
- except OperationalError:
355
- raise
356
-
357
-
358
- def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
359
- """
360
- DESCRIPTION:
361
- Concatenates a list of teradataml DataFrames, GeoDataFrames, or both along the index axis.
362
-
363
- PARAMETERS:
364
- df_list:
365
- Required argument.
366
- Specifies a list of teradataml DataFrames, GeoDataFrames, or both on which the
367
- concatenation is to be performed.
368
- Types: list of teradataml DataFrames and/or GeoDataFrames
369
-
370
- join:
371
- Optional argument.
372
- Specifies how to handle indexes on columns axis.
373
- Supported values are:
374
- • 'OUTER': It instructs the function to project all columns from all the DataFrames.
375
- Columns not present in any DataFrame will have a SQL NULL value.
376
- • 'INNER': It instructs the function to project only the columns common to all DataFrames.
377
- Default value: 'OUTER'
378
- Permitted values: 'INNER', 'OUTER'
379
- Types: str
380
-
381
- allow_duplicates:
382
- Optional argument.
383
- Specifies if the result of concatenation can have duplicate rows.
384
- Default value: True
385
- Types: bool
386
-
387
- sort:
388
- Optional argument.
389
- Specifies a flag to sort the columns axis if it is not already aligned when
390
- the join argument is set to 'outer'.
391
- Default value: False
392
- Types: bool
393
-
394
- ignore_index:
395
- Optional argument.
396
- Specifies whether to ignore the index columns in resulting DataFrame or not.
397
- If True, then index columns will be ignored in the concat operation.
398
- Default value: False
399
- Types: bool
400
-
401
- RETURNS:
402
- teradataml DataFrame, if result does not contain any geometry data, otherwise returns teradataml GeoDataFrame.
403
-
404
- RAISES:
405
- TeradataMlException
406
-
407
- EXAMPLES:
408
- >>> from teradataml import load_example_data
409
- >>> load_example_data("dataframe", "admissions_train")
410
- >>> load_example_data("geodataframe", ["sample_shapes"])
411
- >>> from teradataml.dataframe import concat
412
- >>>
413
- >>> # Default options
414
- >>> df = DataFrame('admissions_train')
415
- >>> df1 = df[df.gpa == 4].select(['id', 'stats', 'masters', 'gpa'])
416
- >>> df1
417
- stats masters gpa
418
- id
419
- 13 Advanced no 4.0
420
- 29 Novice yes 4.0
421
- 15 Advanced yes 4.0
422
- >>> df2 = df[df.gpa < 2].select(['id', 'stats', 'programming', 'admitted'])
423
- >>> df2
424
- stats programming admitted
425
- id
426
- 24 Advanced Novice 1
427
- 19 Advanced Advanced 0
428
- >>> cdf = concat([df1, df2])
429
- >>> cdf
430
- stats masters gpa programming admitted
431
- id
432
- 19 Advanced None NaN Advanced 0
433
- 24 Advanced None NaN Novice 1
434
- 13 Advanced no 4.0 None None
435
- 29 Novice yes 4.0 None None
436
- 15 Advanced yes 4.0 None None
437
- >>>
438
- >>> # concat more than two DataFrames
439
- >>> df3 = df[df.gpa == 3].select(['id', 'stats', 'programming', 'gpa'])
440
- >>> df3
441
- stats programming gpa
442
- id
443
- 36 Advanced Novice 3.0
444
- >>> cdf = concat([df1, df2, df3])
445
- >>> cdf
446
- stats masters gpa programming admitted
447
- id
448
- 15 Advanced yes 4.0 None NaN
449
- 19 Advanced None NaN Advanced 0.0
450
- 36 Advanced None 3.0 Novice NaN
451
- 29 Novice yes 4.0 None NaN
452
- 13 Advanced no 4.0 None NaN
453
- 24 Advanced None NaN Novice 1.0
454
-
455
- >>> # join = 'inner'
456
- >>> cdf = concat([df1, df2], join='inner')
457
- >>> cdf
458
- stats
459
- id
460
- 19 Advanced
461
- 24 Advanced
462
- 13 Advanced
463
- 29 Novice
464
- 15 Advanced
465
- >>>
466
- >>> # allow_duplicates = True (default)
467
- >>> cdf = concat([df1, df2])
468
- >>> cdf
469
- stats masters gpa programming admitted
470
- id
471
- 19 Advanced None NaN Advanced 0
472
- 24 Advanced None NaN Novice 1
473
- 13 Advanced no 4.0 None None
474
- 29 Novice yes 4.0 None None
475
- 15 Advanced yes 4.0 None None
476
- >>> cdf = concat([cdf, df2])
477
- >>> cdf
478
- stats masters gpa programming admitted
479
- id
480
- 19 Advanced None NaN Advanced 0
481
- 13 Advanced no 4.0 None None
482
- 24 Advanced None NaN Novice 1
483
- 24 Advanced None NaN Novice 1
484
- 19 Advanced None NaN Advanced 0
485
- 29 Novice yes 4.0 None None
486
- 15 Advanced yes 4.0 None None
487
- >>>
488
- >>> # allow_duplicates = False
489
- >>> cdf = concat([cdf, df2], allow_duplicates=False)
490
- >>> cdf
491
- stats masters gpa programming admitted
492
- id
493
- 19 Advanced None NaN Advanced 0
494
- 29 Novice yes 4.0 None None
495
- 24 Advanced None NaN Novice 1
496
- 15 Advanced yes 4.0 None None
497
- 13 Advanced no 4.0 None None
498
- >>>
499
- >>> # sort = True
500
- >>> cdf = concat([df1, df2], sort=True)
501
- >>> cdf
502
- admitted gpa masters programming stats
503
- id
504
- 19 0 NaN None Advanced Advanced
505
- 24 1 NaN None Novice Advanced
506
- 13 None 4.0 no None Advanced
507
- 29 None 4.0 yes None Novice
508
- 15 None 4.0 yes None Advanced
509
- >>>
510
- >>> # ignore_index = True
511
- >>> cdf = concat([df1, df2], ignore_index=True)
512
- >>> cdf
513
- stats masters gpa programming admitted
514
- 0 Advanced yes 4.0 None NaN
515
- 1 Advanced None NaN Advanced 0.0
516
- 2 Novice yes 4.0 None NaN
517
- 3 Advanced None NaN Novice 1.0
518
- 4 Advanced no 4.0 None NaN
519
-
520
- # Perform concatenation of two GeoDataFrames
521
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
522
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
523
- >>> geo_dataframe1
524
-
525
- skey linestrings
526
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
527
-
528
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','polygons'])
529
- >>> geo_dataframe2
530
-
531
- skey polygons
532
- 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
533
- 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
534
- 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
535
- 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
536
- 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
537
- 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
538
- 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
539
- 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
540
- 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
541
-
542
-
543
- >>> concat([geo_dataframe1,geo_dataframe2])
544
-
545
- skey linestrings polygons
546
- 1009 None MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
547
- 1005 None POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
548
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80) None
549
- 1004 None POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
550
- 1003 None POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
551
- 1001 None POLYGON ((0 0,0 20,20 20,20 0,0 0))
552
- 1002 None POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
553
- 1007 None MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
554
- 1006 None POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
555
- 1008 None MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
556
-
557
- # Perform concatenation of a DataFrame and GeoDataFrame which returns a GeoDataFrame.
558
- >>> normal_df=df.select(['id','stats'])
559
- >>> normal_df
560
- stats
561
- id
562
- 34 Advanced
563
- 32 Advanced
564
- 11 Advanced
565
- 40 Novice
566
- 38 Advanced
567
- 36 Advanced
568
- 7 Novice
569
- 26 Advanced
570
- 19 Advanced
571
- 13 Advanced
572
- >>> geo_df = geo_dataframe[geo_dataframe.skey < 1010].select(['skey', 'polygons'])
573
- >>> geo_df
574
-
575
- skey polygons
576
- 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
577
- 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
578
- 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
579
- 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
580
- 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
581
- 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
582
- 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
583
- 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
584
- 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
585
-
586
- >>> idf = concat([normal_df, geo_df])
587
- >>> idf
588
- stats skey polygons
589
- id
590
- 38 Advanced None None
591
- 7 Novice None None
592
- 26 Advanced None None
593
- 17 Advanced None None
594
- 34 Advanced None None
595
- 13 Advanced None None
596
- 32 Advanced None None
597
- 11 Advanced None None
598
- 15 Advanced None None
599
- 36 Advanced None None
600
- >>>
601
- """
602
- concat_join_permitted_values = ['INNER', 'OUTER']
603
-
604
- # Below matrix is list of list, where in each row contains following elements:
605
- # Let's take an example of following, just to get an idea:
606
- # [element1, element2, element3, element4, element5, element6]
607
- # e.g.
608
- # ["join", join, True, (str), True, concat_join_permitted_values]
609
-
610
- # 1. element1 --> Argument Name, a string. ["join" in above example.]
611
- # 2. element2 --> Argument itself. [join]
612
- # 3. element3 --> Specifies a flag that mentions argument is optional or not.
613
- # False, means required and True means optional.
614
- # 4. element4 --> Tuple of accepted types. (str) in above example.
615
- # 5. element5 --> True, means validate for empty value. Error will be raised, if empty values is passed.
616
- # If not specified, means same as specifying False.
617
- # 6. element6 --> A list of permitted values, an argument can accept.
618
- # If not specified, it is as good as passing None. If a list is passed, validation will be
619
- # performed for permitted values.
620
- awu_matrix = []
621
- awu_matrix.append(["df_list", df_list, False, (list)])
622
- awu_matrix.append(["join", join, True, (str), True, concat_join_permitted_values])
623
- awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
624
- awu_matrix.append(["sort", sort, False, (bool)])
625
- awu_matrix.append(["ignore_index", ignore_index, False, (bool)])
626
- setop_type='concat'
627
-
628
- # Validate Set operator arguments
629
- __validate_setop_args(df_list, awu_matrix, setop_type)
630
-
631
- # Generate the columns and their type to output, and check if the evaluation has to be lazy
632
- master_columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort, ignore_index)
633
-
634
- try:
635
- aed_utils = AedUtils()
636
-
637
- # Set the index_label to columns in first df's index_label if it is being projected,
638
- # else set it to columns in second df's index_label if it is being projected, else go on till last.
639
- # Finally set to None if none of df's have index_label
640
- index_label = None
641
- index_to_use = None
642
- for df in df_list:
643
- if df._index_label is not None and any(ind_col in master_columns_dict for ind_col in df._index_label):
644
- index_label = []
645
- index_to_use = df._index_label
646
- break
647
-
648
- if index_to_use is not None:
649
- for ind_col in index_to_use:
650
- if ind_col in master_columns_dict:
651
- index_label.append(ind_col)
652
-
653
- # Remove index columns if 'ignore_index' is set to True from master_columns_dict
654
- if ignore_index and index_to_use is not None:
655
- index_label = None
656
- index_to_use = None
657
-
658
- col_list = []
659
- for i in range(len(df_list)):
660
- col_list.append([])
661
-
662
- # Now create the list of columns for each DataFrame to concatenate
663
- type_compiler = td_type_compiler(td_dialect)
664
- for col_name, value in master_columns_dict.items():
665
- for i in range(len(col_list)):
666
- if not value['col_present'][i]:
667
- col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']),
668
- UtilFuncs._teradata_quote_arg(col_name, "\"",
669
- False)))
670
- else:
671
- col_name = UtilFuncs._process_for_teradata_keyword(col_name)
672
- col_list[i].append(col_name)
673
-
674
- input_table_columns = []
675
- for i in range(len(col_list)):
676
- input_table_columns.append(','.join(col_list[i]))
677
-
678
- concat_nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
679
- 'unionall' if allow_duplicates else 'union',
680
- input_table_columns)
681
- return __process_operation(master_columns_dict, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
682
-
683
- except TeradataMlException:
684
- raise
685
- except Exception as err:
686
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
687
- MessageCodes.SETOP_FAILED) from err
688
-
689
- def td_intersect(df_list, allow_duplicates=True):
690
- """
691
- DESCRIPTION:
692
- Function intersects a list of teradataml DataFrames or GeoDataFrames along the index axis and
693
- returns a DataFrame with rows common to all input DataFrames.
694
- Note:
695
- This function should be applied to data frames of the same type: either all teradataml DataFrames,
696
- or all GeoDataFrames.
697
-
698
- PARAMETERS:
699
- df_list:
700
- Required argument.
701
- Specifies the list of teradataml DataFrames or GeoDataFrames on which the intersection is to be performed.
702
- Types: list of teradataml DataFrames or GeoDataFrames
703
-
704
- allow_duplicates:
705
- Optional argument.
706
- Specifies if the result of intersection can have duplicate rows.
707
- Default value: True
708
- Types: bool
709
-
710
- RETURNS:
711
- teradataml DataFrame when intersect is performed on teradataml DataFrames.
712
- teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
713
-
714
- RAISES:
715
- TeradataMlException, TypeError
716
-
717
- EXAMPLES:
718
- >>> from teradataml import load_example_data
719
- >>> load_example_data("dataframe", "setop_test1")
720
- >>> load_example_data("dataframe", "setop_test2")
721
- >>> load_example_data("geodataframe", ["sample_shapes"])
722
- >>> from teradataml.dataframe.setop import td_intersect
723
- >>>
724
- >>> df1 = DataFrame('setop_test1')
725
- >>> df1
726
- masters gpa stats programming admitted
727
- id
728
- 62 no 3.70 Advanced Advanced 1
729
- 53 yes 3.50 Beginner Novice 1
730
- 69 no 3.96 Advanced Advanced 1
731
- 61 yes 4.00 Advanced Advanced 1
732
- 58 no 3.13 Advanced Advanced 1
733
- 51 yes 3.76 Beginner Beginner 0
734
- 68 no 1.87 Advanced Novice 1
735
- 66 no 3.87 Novice Beginner 1
736
- 60 no 4.00 Advanced Novice 1
737
- 59 no 3.65 Novice Novice 1
738
- >>> df2 = DataFrame('setop_test2')
739
- >>> df2
740
- masters gpa stats programming admitted
741
- id
742
- 12 no 3.65 Novice Novice 1
743
- 15 yes 4.00 Advanced Advanced 1
744
- 14 yes 3.45 Advanced Advanced 0
745
- 20 yes 3.90 Advanced Advanced 1
746
- 18 yes 3.81 Advanced Advanced 1
747
- 17 no 3.83 Advanced Advanced 1
748
- 13 no 4.00 Advanced Novice 1
749
- 11 no 3.13 Advanced Advanced 1
750
- 60 no 4.00 Advanced Novice 1
751
- 19 yes 1.98 Advanced Advanced 0
752
- >>> idf = td_intersect([df1, df2])
753
- >>> idf
754
- masters gpa stats programming admitted
755
- id
756
- 64 yes 3.81 Advanced Advanced 1
757
- 60 no 4.00 Advanced Novice 1
758
- 58 no 3.13 Advanced Advanced 1
759
- 68 no 1.87 Advanced Novice 1
760
- 66 no 3.87 Novice Beginner 1
761
- 60 no 4.00 Advanced Novice 1
762
- 62 no 3.70 Advanced Advanced 1
763
- >>>
764
- >>> idf = td_intersect([df1, df2], allow_duplicates=False)
765
- >>> idf
766
- masters gpa stats programming admitted
767
- id
768
- 64 yes 3.81 Advanced Advanced 1
769
- 60 no 4.00 Advanced Novice 1
770
- 58 no 3.13 Advanced Advanced 1
771
- 68 no 1.87 Advanced Novice 1
772
- 66 no 3.87 Novice Beginner 1
773
- 62 no 3.70 Advanced Advanced 1
774
- >>> # intersecting more than two DataFrames
775
- >>> df3 = df1[df1.gpa <= 3.5]
776
- >>> df3
777
- masters gpa stats programming admitted
778
- id
779
- 58 no 3.13 Advanced Advanced 1
780
- 67 yes 3.46 Novice Beginner 0
781
- 54 yes 3.50 Beginner Advanced 1
782
- 68 no 1.87 Advanced Novice 1
783
- 53 yes 3.50 Beginner Novice 1
784
- >>> idf = td_intersect([df1, df2, df3])
785
- >>> idf
786
- masters gpa stats programming admitted
787
- id
788
- 58 no 3.13 Advanced Advanced 1
789
- 68 no 1.87 Advanced Novice 1
790
-
791
- # Perform intersection of two GeoDataFrames.
792
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
793
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
794
- >>> geo_dataframe1
795
-
796
- skey linestrings
797
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
798
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
799
- >>> geo_dataframe2
800
-
801
- skey linestrings
802
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
803
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
804
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
805
- 1002 LINESTRING (1 3,3 0,0 1)
806
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
807
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
808
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
809
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
810
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
811
- >>> td_intersect([geo_dataframe1,geo_dataframe2])
812
-
813
- skey linestrings
814
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
815
- """
816
- awu_matrix = []
817
- awu_matrix.append(["df_list", df_list, False, (list)])
818
- awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
819
- setop_type = 'td_intersect'
820
- operation = 'intersect'
821
-
822
- # Validate Set operator arguments
823
- __validate_setop_args(df_list, awu_matrix, setop_type)
824
-
825
- return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
826
-
827
- def td_minus(df_list, allow_duplicates=True):
828
- """
829
- DESCRIPTION:
830
- This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
831
- and not in other teradataml DataFrames or GeoDataFrames along the index axis.
832
- Note:
833
- This function should be applied to data frames of the same type: either all teradataml DataFrames,
834
- or all GeoDataFrames.
835
-
836
- PARAMETERS:
837
- df_list:
838
- Required argument.
839
- Specifies the list of teradataml DataFrames or GeoDataFrames on which the minus
840
- operation is to be performed.
841
- Types: list of teradataml DataFrames or GeoDataFrames
842
-
843
- allow_duplicates:
844
- Optional argument.
845
- Specifies if the result of minus operation can have duplicate rows.
846
- Default value: True
847
- Types: bool
848
-
849
- RETURNS:
850
- teradataml DataFrame when operation is performed on teradataml DataFrames.
851
- teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
852
-
853
- RAISES:
854
- TeradataMlException, TypeError
855
-
856
- EXAMPLES:
857
- >>> from teradataml import load_example_data
858
- >>> load_example_data("dataframe", "setop_test1")
859
- >>> load_example_data("dataframe", "setop_test2")
860
- >>> load_example_data("geodataframe", ["sample_shapes"])
861
- >>> from teradataml.dataframe.setop import td_minus
862
- >>>
863
- >>> df1 = DataFrame('setop_test1')
864
- >>> df1
865
- masters gpa stats programming admitted
866
- id
867
- 62 no 3.70 Advanced Advanced 1
868
- 53 yes 3.50 Beginner Novice 1
869
- 69 no 3.96 Advanced Advanced 1
870
- 61 yes 4.00 Advanced Advanced 1
871
- 58 no 3.13 Advanced Advanced 1
872
- 51 yes 3.76 Beginner Beginner 0
873
- 68 no 1.87 Advanced Novice 1
874
- 66 no 3.87 Novice Beginner 1
875
- 60 no 4.00 Advanced Novice 1
876
- 59 no 3.65 Novice Novice 1
877
- >>> df2 = DataFrame('setop_test2')
878
- >>> df2
879
- masters gpa stats programming admitted
880
- id
881
- 12 no 3.65 Novice Novice 1
882
- 15 yes 4.00 Advanced Advanced 1
883
- 14 yes 3.45 Advanced Advanced 0
884
- 20 yes 3.90 Advanced Advanced 1
885
- 18 yes 3.81 Advanced Advanced 1
886
- 17 no 3.83 Advanced Advanced 1
887
- 13 no 4.00 Advanced Novice 1
888
- 11 no 3.13 Advanced Advanced 1
889
- 60 no 4.00 Advanced Novice 1
890
- 19 yes 1.98 Advanced Advanced 0
891
- >>> idf = td_minus([df1[df1.id<55] , df2])
892
- >>> idf
893
- masters gpa stats programming admitted
894
- id
895
- 51 yes 3.76 Beginner Beginner 0
896
- 50 yes 3.95 Beginner Beginner 0
897
- 54 yes 3.50 Beginner Advanced 1
898
- 52 no 3.70 Novice Beginner 1
899
- 53 yes 3.50 Beginner Novice 1
900
- 53 yes 3.50 Beginner Novice 1
901
- >>>
902
- >>> idf = td_minus([df1[df1.id<55] , df2], allow_duplicates=False)
903
- >>> idf
904
- masters gpa stats programming admitted
905
- id
906
- 54 yes 3.50 Beginner Advanced 1
907
- 51 yes 3.76 Beginner Beginner 0
908
- 53 yes 3.50 Beginner Novice 1
909
- 50 yes 3.95 Beginner Beginner 0
910
- 52 no 3.70 Novice Beginner 1
911
- >>> # applying minus on more than two DataFrames
912
- >>> df3 = df1[df1.gpa <= 3.9]
913
- >>> idf = td_minus([df1, df2, df3])
914
- >>> idf
915
- masters gpa stats programming admitted
916
- id
917
- 61 yes 4.00 Advanced Advanced 1
918
- 50 yes 3.95 Beginner Beginner 0
919
- 69 no 3.96 Advanced Advanced 1
920
-
921
- # td_minus on GeoDataFrame
922
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
923
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
924
- >>> geo_dataframe1
925
- skey linestrings
926
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
927
-
928
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
929
- >>> geo_dataframe2
930
- skey linestrings
931
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
932
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
933
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
934
- 1002 LINESTRING (1 3,3 0,0 1)
935
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
936
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
937
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
938
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
939
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
940
-
941
- >>> td_minus([geo_dataframe2,geo_dataframe1])
942
- linestrings
943
- skey
944
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
945
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
946
- 1002 LINESTRING (1 3,3 0,0 1)
947
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
948
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
949
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
950
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
951
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
952
- """
953
- awu_matrix = []
954
- awu_matrix.append(["df_list", df_list, False, (list)])
955
- awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
956
- setop_type = 'td_except' if (inspect.stack()[1][3]) == 'td_except' else 'td_minus'
957
- operation = 'minus'
958
-
959
- # Validate Set operator arguments
960
- __validate_setop_args(df_list, awu_matrix, setop_type)
961
-
962
- return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
963
-
964
- def td_except(df_list, allow_duplicates=True):
965
- """
966
- DESCRIPTION:
967
- This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
968
- and not in other teradataml DataFrames or GeoDataFrames along the index axis.
969
- Note:
970
- This function should be applied to data frames of the same type: either all teradataml DataFrames,
971
- or all GeoDataFrames.
972
-
973
- PARAMETERS:
974
- df_list:
975
- Required argument.
976
- Specifies the list of teradataml DataFrames or GeoDataFrames on which the except
977
- operation is to be performed.
978
- Types: list of teradataml DataFrames or GeoDataFrames
979
-
980
- allow_duplicates:
981
- Optional argument.
982
- Specifies if the result of except operation can have duplicate rows.
983
- Default value: True
984
- Types: bool
985
-
986
- RETURNS:
987
- teradataml DataFrame when operation is performed on teradataml DataFrames.
988
- teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
989
-
990
- RAISES:
991
- TeradataMlException, TypeError
992
-
993
- EXAMPLES:
994
- >>> from teradataml import load_example_data
995
- >>> load_example_data("dataframe", "setop_test1")
996
- >>> load_example_data("dataframe", "setop_test2")
997
- >>> load_example_data("geodataframe", ["sample_shapes"])
998
- >>> from teradataml.dataframe.setop import td_except
999
- >>>
1000
- >>> df1 = DataFrame('setop_test1')
1001
- >>> df1
1002
- masters gpa stats programming admitted
1003
- id
1004
- 62 no 3.70 Advanced Advanced 1
1005
- 53 yes 3.50 Beginner Novice 1
1006
- 69 no 3.96 Advanced Advanced 1
1007
- 61 yes 4.00 Advanced Advanced 1
1008
- 58 no 3.13 Advanced Advanced 1
1009
- 51 yes 3.76 Beginner Beginner 0
1010
- 68 no 1.87 Advanced Novice 1
1011
- 66 no 3.87 Novice Beginner 1
1012
- 60 no 4.00 Advanced Novice 1
1013
- 59 no 3.65 Novice Novice 1
1014
- >>> df2 = DataFrame('setop_test2')
1015
- >>> df2
1016
- masters gpa stats programming admitted
1017
- id
1018
- 12 no 3.65 Novice Novice 1
1019
- 15 yes 4.00 Advanced Advanced 1
1020
- 14 yes 3.45 Advanced Advanced 0
1021
- 20 yes 3.90 Advanced Advanced 1
1022
- 18 yes 3.81 Advanced Advanced 1
1023
- 17 no 3.83 Advanced Advanced 1
1024
- 13 no 4.00 Advanced Novice 1
1025
- 11 no 3.13 Advanced Advanced 1
1026
- 60 no 4.00 Advanced Novice 1
1027
- 19 yes 1.98 Advanced Advanced 0
1028
- >>> idf = td_except([df1[df1.id<55] , df2])
1029
- >>> idf
1030
- masters gpa stats programming admitted
1031
- id
1032
- 51 yes 3.76 Beginner Beginner 0
1033
- 50 yes 3.95 Beginner Beginner 0
1034
- 54 yes 3.50 Beginner Advanced 1
1035
- 52 no 3.70 Novice Beginner 1
1036
- 53 yes 3.50 Beginner Novice 1
1037
- 53 yes 3.50 Beginner Novice 1
1038
- >>>
1039
- >>> idf = td_except([df1[df1.id<55] , df2], allow_duplicates=False)
1040
- >>> idf
1041
- masters gpa stats programming admitted
1042
- id
1043
- 54 yes 3.50 Beginner Advanced 1
1044
- 51 yes 3.76 Beginner Beginner 0
1045
- 53 yes 3.50 Beginner Novice 1
1046
- 50 yes 3.95 Beginner Beginner 0
1047
- 52 no 3.70 Novice Beginner 1
1048
- >>> # applying except on more than two DataFrames
1049
- >>> df3 = df1[df1.gpa <= 3.9]
1050
- >>> idf = td_except([df1, df2, df3])
1051
- >>> idf
1052
- masters gpa stats programming admitted
1053
- id
1054
- 61 yes 4.00 Advanced Advanced 1
1055
- 50 yes 3.95 Beginner Beginner 0
1056
- 69 no 3.96 Advanced Advanced 1
1057
-
1058
- # td_except on GeoDataFrames
1059
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
1060
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
1061
- >>> geo_dataframe1
1062
- skey linestrings
1063
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1064
-
1065
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
1066
- >>> geo_dataframe2
1067
- skey linestrings
1068
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1069
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1070
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1071
- 1002 LINESTRING (1 3,3 0,0 1)
1072
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
1073
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1074
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1075
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1076
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1077
-
1078
- >>> td_except([geo_dataframe2,geo_dataframe1])
1079
- skey linestrings
1080
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1081
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1082
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1083
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1084
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1085
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
1086
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1087
- 1002 LINESTRING (1 3,3 0,0 1)
1088
- """
1089
- return td_minus(df_list, allow_duplicates)
1090
-
1091
- def __process_setop_operation(df_list, allow_duplicates, setop_type, operation):
1092
- """
1093
- DESCRIPTION:
1094
- Internal function to process set opertaion and return the result DataFrame/GeoDataFrame.
1095
-
1096
- PARAMETERS:
1097
- df_list:
1098
- Required argument.
1099
- Specifies the list of teradataml DataFrames/GeoDataFrames on which the except
1100
- operation is to be performed.
1101
- Types: list of teradataml DataFrames
1102
-
1103
- allow_duplicates:
1104
- Optional argument.
1105
- Specifies if the result of except operation can have duplicate rows.
1106
- Default value: True
1107
- Types: bool
1108
-
1109
- setop_type:
1110
- Required argument.
1111
- Specifies set opertaion.
1112
- Types: str
1113
-
1114
- operation:
1115
- Required argument.
1116
- Specifies set opertaion name.
1117
- Types: str
1118
-
1119
- RETURNS:
1120
- teradataml DataFrame/GeoDataFrame
1121
-
1122
- RAISES:
1123
- TeradataMlException
1124
-
1125
- EXAMPLES:
1126
- >>> __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
1127
-
1128
- """
1129
-
1130
- # Check if set operation can be lazy or not
1131
- is_lazy = __check_setop_if_lazy(df_list)
1132
-
1133
- # Get the first DataFrame's metaexpr
1134
- first_df_metaexpr = df_list[0]._metaexpr
1135
-
1136
- try:
1137
- aed_utils = AedUtils()
1138
- input_table_columns = []
1139
- for i in range(len(df_list)):
1140
- col_list = []
1141
- for j in range(len(df_list[i].columns)):
1142
- col_list.append(UtilFuncs._process_for_teradata_keyword(df_list[i].columns[j]))
1143
-
1144
- input_table_columns.append(','.join(col_list))
1145
-
1146
- nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
1147
- '{}all'.format(operation) if allow_duplicates else '{}'.format(operation),
1148
- input_table_columns)
1149
-
1150
- # Set the index_label to columns in first df's index_label if it is not None,
1151
- # else set it to None i.e. no index_label.
1152
- index_label = []
1153
- index_to_use = None
1154
- index_to_use = df_list[0]._index_label if df_list[0]._index_label is not None else None
1155
-
1156
- if index_to_use is not None:
1157
- index_label = index_to_use
1158
-
1159
- class_name = df_list[0].__class__.__name__
1160
- return __process_operation(first_df_metaexpr, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name)
1161
-
1162
- except TeradataMlException:
1163
- raise
1164
- except Exception as err:
1165
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
1166
- MessageCodes.SETOP_FAILED) from err
1
+ #!/usr/bin/python
2
+ # ##################################################################
3
+ #
4
+ # Copyright 2019 Teradata. All rights reserved.
5
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
6
+ #
7
+ # Primary Owner: Rohit Khurd (rohit.khurd@teradata.com
8
+ # Secondary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
9
+ #
10
+ # This file implements APIs and utility functions for set operations.
11
+ # ##################################################################
12
+
13
+ import inspect, importlib
14
+ from collections import OrderedDict
15
+ from teradataml.common.exceptions import TeradataMlException
16
+ from teradataml.common.messages import Messages
17
+ from teradataml.common.messagecodes import MessageCodes
18
+ from teradataml.common.utils import UtilFuncs
19
+ from teradataml.dataframe import dataframe
20
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
21
+ from teradataml.common.aed_utils import AedUtils
22
+ from teradataml.utils.validators import _Validators
23
+ from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
24
+ from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
25
+ from teradatasql import OperationalError
26
+
27
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
28
+
29
+ module = importlib.import_module("teradataml")
30
+
31
+ def __validate_setop_args(df_list, awu_matrix, setop_type):
32
+ """
33
+ DESCRIPTION:
34
+ Internal function to check for the validity of the input arguments.
35
+
36
+ PARAMETERS:
37
+ df_list:
38
+ Required argument.
39
+ Specifies the list of teradataml DataFrames.
40
+ Types: list of teradataml DataFrames
41
+
42
+ awu_matrix:
43
+ Required argument.
44
+ Specifies the argument is expected to be a list of arguments, expected types are
45
+ mentioned as type or tuple.
46
+
47
+ setop_type:
48
+ Required argument.
49
+ Specifies the type of SET Operation to be performed.
50
+ Types: str
51
+
52
+ RAISES:
53
+ TeradataMlException
54
+
55
+ EXAMPLES:
56
+ __validate_setop_args(df_list, awu_matrix, setop_type)
57
+
58
+ """
59
+ # Validate argument types
60
+ _Validators._validate_function_arguments(awu_matrix)
61
+
62
+ # Validate the number of dfs in df_list
63
+ if len(df_list) < 2:
64
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_INVALID_DF_COUNT,
65
+ setop_type),
66
+ MessageCodes.SETOP_INVALID_DF_COUNT)
67
+
68
+ # Validate if all items in df_list are DataFrames
69
+ for i in range(len(df_list)):
70
+ _Validators._validate_function_arguments([['df_list[{0}]'.format(i), df_list[i],
71
+ False, (dataframe.DataFrame)]])
72
+
73
+ # Validate number of columns for 'td_intersect' and 'td_minus'
74
+ if setop_type in ['td_intersect', 'td_minus', 'td_except']:
75
+ it = iter(df_list[i].columns for i in range(len(df_list)))
76
+ the_len = len(next(it))
77
+ if not all(len(l) == the_len for l in it):
78
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_DF_LENGTH),
79
+ MessageCodes.INVALID_DF_LENGTH)
80
+
81
+
82
+ def __check_concat_compatibility(df_list, join, sort, ignore_index):
83
+ """
84
+ DESCRIPTION:
85
+ Internal function to check if the DataFrames are compatible for concat or not.
86
+
87
+ PARAMETERS:
88
+ df_list:
89
+ Required argument.
90
+ Specifies the list of teradataml DataFrames to be concatenated.
91
+ Type: list of teradataml DataFrames
92
+
93
+ join:
94
+ Required argument.
95
+ Specifies the type of join to use in concat ('inner' or 'outer').
96
+ Type: str
97
+
98
+ sort:
99
+ Required argument.
100
+ Specifies a flag to determine whether the columns should be sorted while being projected.
101
+ Type: bool
102
+
103
+ ignore_index:
104
+ Required argument.
105
+ Specifies whether to ignore the index columns in resulting DataFrame or not.
106
+ Types: bool
107
+
108
+ RETURNS:
109
+ A tuple of the following form:
110
+ (master_column_dict, is_lazy)
111
+
112
+ where master_column_dict is a dictionary with the column names to project as a result as the keys,
113
+ and is of the following form:
114
+ {
115
+ '<col_name_1>' : {
116
+ 'col_present' : [True, False],
117
+ 'col_type': <type>
118
+ },
119
+ '<col_name_2>' : {
120
+ ...
121
+ },
122
+ ...
123
+ }
124
+
125
+ The value of the keys in the dictionary is again a dictionary with the following elements:
126
+ 1. 'col_present': A list of booleans, the nth value in it indicating the columns presence in the nth DF.
127
+ Presence specified by True, and absence by False,
128
+ 2. 'col_type': The teradatasqlalchemy datatype of the column in the first DF that the column is present in,
129
+
130
+ and 'is_lazy' is a boolean which indicates whether the result DataFrame creation should be a lazy operation
131
+ or not, based on the column type compatibility.
132
+
133
+ RAISES:
134
+ None
135
+
136
+ EXAMPLES:
137
+ columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort)
138
+ """
139
+ dfs_to_operate_on = df_list
140
+
141
+ # Initialize the return objects including a variable deciding whether the execution is lazy or not.
142
+ # The execution will be non-lazy if the types of columns are not an exact match.
143
+ # TODO: Add a set operation type compatibility matrix for use to make this operation completely lazy
144
+ # https://jira.td.teradata.com/jira/browse/ELE-1913
145
+
146
+ col_dict = OrderedDict()
147
+ is_lazy = True
148
+
149
+ # Iterate on all DFs to be applied for set operation.
150
+ for df in dfs_to_operate_on:
151
+ # Process each column in the DF of the iteration.
152
+ for c in df._metaexpr.t.c:
153
+ col_name = c.name
154
+ # Process the column name if it is not already processed.
155
+ # Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
156
+ # then the column with the same name in any DF in later iterations need not be processed.
157
+ if col_name not in col_dict:
158
+ # For every column, it's entry in the dictionary looks like:
159
+ # '<column_name>' : { 'col_present' : [True, False], 'col_type': <type> }
160
+ # where :
161
+ # '<column_name>' : is the name of the column being processed.
162
+ #
163
+ # It's value is yet another dictionary with keys:
164
+ # 'col_present' : Its value is a list of booleans, the nth value in it indicating the
165
+ # columns presence in the nth DF - presence specified by True,
166
+ # and absence by False.
167
+ # 'col_type' : Its value is the teradatasqlalchemy type of the column in the first DF
168
+ # that the column is present in.
169
+
170
+ # Generate a list of booleans, each value of it indicating the columns presence in the DF in the
171
+ # dfs_to_operate_on list. If ignore_index is True then assign False so that we can ignore when
172
+ # forming dict.
173
+
174
+ col_present_in_dfs = []
175
+ for inner_df in dfs_to_operate_on:
176
+ col_present_in_df = None
177
+ if ignore_index and inner_df.index and col_name in inner_df._index_label:
178
+ col_present_in_df = False
179
+ else:
180
+ col_present_in_df = df_utils._check_column_exists(col_name, inner_df.columns)
181
+ col_present_in_dfs.append(col_present_in_df)
182
+
183
+ if join.upper() == 'INNER':
184
+ # For inner join, column has to present in all DFs.
185
+ if all(col_present_in_dfs):
186
+ col_dict[col_name] = {}
187
+
188
+ # Get the type of the column in all the DFs.
189
+ col_types_in_dfs = [inner_df._metaexpr.t.c[col_name].type for inner_df in
190
+ dfs_to_operate_on]
191
+
192
+ # Populate the 'column_present' list using the col_present_in_dfs.
193
+ col_dict[col_name]['col_present'] = col_present_in_dfs
194
+ # The type to be used for the column is the one of the first DF it is present in.
195
+ col_dict[col_name]['col_type'] = col_types_in_dfs[0]
196
+
197
+ # If the type of the column in all DFs is not the same, then the operation is not lazy.
198
+ if not all(ctype == col_dict[col_name]['col_type']
199
+ for ctype in col_types_in_dfs):
200
+ is_lazy = False
201
+
202
+ elif join.upper() == 'OUTER':
203
+ # If the column is marked as False for all DataFrames
204
+ if not any(col_present_in_dfs):
205
+ pass
206
+ else:
207
+ # For outer join, column need not be present in all DFs.
208
+ col_dict[col_name] = {}
209
+ # Get the type of the column in all the DFs. None for the DF it is not present in.
210
+ col_types_in_dfs = [None if not present else inner_df._metaexpr.t.c[col_name].type
211
+ for (inner_df, present) in zip(dfs_to_operate_on, col_present_in_dfs)]
212
+
213
+ # Find the type of the column in the first DF it is present in.
214
+ non_none_type_to_add = next(ctype for ctype in col_types_in_dfs if ctype is not None)
215
+
216
+ # Populate the 'column_present' list using the col_present_in_dfs.
217
+ col_dict[col_name]['col_present'] = col_present_in_dfs
218
+ # The type to be used for the column is the one of the first DF it is present in.
219
+ col_dict[col_name]['col_type'] = non_none_type_to_add
220
+
221
+ # If the type of the column in all DFs is not the same, then the operation is not lazy.
222
+ if not all(True if ctype is None else ctype == non_none_type_to_add
223
+ for ctype in col_types_in_dfs):
224
+ is_lazy = False
225
+
226
+ # Sort if required
227
+ if sort and join.upper() == 'OUTER':
228
+ col_dict = OrderedDict(sorted(col_dict.items()))
229
+
230
+ # If the result has no columns, i.e. no data
231
+ if len(col_dict) < 1:
232
+ raise TeradataMlException(Messages.get_message(MessageCodes.DF_WITH_NO_COLUMNS),
233
+ MessageCodes.DF_WITH_NO_COLUMNS)
234
+
235
+ return col_dict, is_lazy
236
+
237
+
238
+ def __check_setop_if_lazy(df_list):
239
+ """
240
+ DESCRIPTION:
241
+ Internal function to check if the teradataml DataFrames column types are compatible for
242
+ any set operation or not.
243
+
244
+ PARAMETERS:
245
+ df_list:
246
+ Required argument.
247
+ Specifies the list of teradataml DataFrames.
248
+ Types: list of teradataml DataFrames
249
+
250
+ RETURNS:
251
+ A boolean 'is_lazy' which indicates whether the result DataFrame creation should be a
252
+ lazy operation or not.
253
+
254
+ RAISES:
255
+ None
256
+
257
+ EXAMPLES:
258
+ is_lazy = __check_setop_if_lazy(df_list)
259
+ """
260
+
261
+ # Initialize the return variable deciding whether the execution is lazy or not.
262
+ # The execution will be non-lazy if the types of columns are not an exact match.
263
+ is_lazy = True
264
+
265
+ # Take first df's metadata for columns and then iterate for column_names on first DF which
266
+ # has to be projected for any set operation.
267
+ for i, col in enumerate(df_list[0]._metaexpr.t.c):
268
+ for k in range(1, len(df_list)) :
269
+ next_df_cols = df_list[k].columns
270
+ next_df_type = df_list[k]._metaexpr.t.c[next_df_cols[i]].type
271
+ if (type(next_df_type) != type(col.type)):
272
+ is_lazy = False
273
+
274
+ return is_lazy
275
+
276
+
277
+ def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name = None):
278
+ """
279
+ DESCRIPTION:
280
+ Internal function to process the columns as per given nodeid and setop_type, and
281
+ return the result DataFrame.
282
+
283
+ PARAMETERS:
284
+ meta_data:
285
+ Required argument.
286
+ Specifies either a metaexpr for the first DataFrame or a dictionary with the
287
+ column names as dictionary keys to be projected as a result. If a dict, the value
288
+ of the keys in the dictionary is again a dictionary with the elements mentioning
289
+ column presence and its type.
290
+ Types: _MetaExpression, OrderedDict
291
+
292
+ is_lazy:
293
+ Required argument.
294
+ Specifies a boolean based on the column type compatibility, indicating
295
+ whether set operation is lazy or not.
296
+ Types: bool
297
+
298
+ setop_type:
299
+ Required argument.
300
+ Specifies the type of SET Operation to be performed.
301
+ Types: str
302
+
303
+ nodeid:
304
+ Required argument.
305
+ node id for the teradataml DataFrame.
306
+
307
+ index_label:
308
+ Required argument.
309
+ Specifies list of index columns for teradataml DataFrame.
310
+ Types: list
311
+
312
+ index_to_use:
313
+ Required argument.
314
+ Specifies column(s) which can also be part of final index_label list.
315
+ Types: list
316
+
317
+ class_name:
318
+ Optional argument.
319
+ Specifies the name of the class for the first dataframe for deciding the
320
+ return type of the output dataframe.
321
+ Default: None
322
+ Types: String
323
+
324
+ RETURNS:
325
+ teradataml DataFrame
326
+
327
+ RAISES:
328
+ TeradataMlException
329
+
330
+ EXAMPLES:
331
+ >>> __process_operation(meta_data, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
332
+
333
+ """
334
+
335
+ # Separate processing for concat and other set operators as concat has OrderedDict as metadata.
336
+ if setop_type == 'concat':
337
+ class_name = "DataFrame"
338
+ column_info = list((col_name, meta_data[col_name]['col_type']) for col_name in meta_data)
339
+ for col in column_info:
340
+ if isinstance(col[1], (GEOMETRY, MBR, MBB)):
341
+ class_name = "GeoDataFrame"
342
+ break
343
+
344
+ # Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
345
+ meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info) if is_lazy else meta_data
346
+
347
+ if is_lazy:
348
+ return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
349
+ else:
350
+ try:
351
+ # Execute node and get table_name to build DataFrame on.
352
+ table_name = df_utils._execute_node_return_db_object_name(nodeid)
353
+ return getattr(module, class_name).from_table(table_name, index_label=index_to_use)
354
+ except TeradataMlException as err:
355
+ # We should be here only because of failure caused in creating DF.
356
+ # due to incompatible types, but a TeradataMLException is raised when DF creation fails.
357
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_COL_TYPE_MISMATCH, setop_type),
358
+ MessageCodes.SETOP_COL_TYPE_MISMATCH) from err
359
+ except OperationalError:
360
+ raise
361
+
362
+
363
+ @collect_queryband(queryband="concat")
364
+ def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
365
+ """
366
+ DESCRIPTION:
367
+ Concatenates a list of teradataml DataFrames, GeoDataFrames, or both along the index axis.
368
+
369
+ PARAMETERS:
370
+ df_list:
371
+ Required argument.
372
+ Specifies a list of teradataml DataFrames, GeoDataFrames, or both on which the
373
+ concatenation is to be performed.
374
+ Types: list of teradataml DataFrames and/or GeoDataFrames
375
+
376
+ join:
377
+ Optional argument.
378
+ Specifies how to handle indexes on columns axis.
379
+ Supported values are:
380
+ • 'OUTER': It instructs the function to project all columns from all the DataFrames.
381
+ Columns not present in any DataFrame will have a SQL NULL value.
382
+ 'INNER': It instructs the function to project only the columns common to all DataFrames.
383
+ Default value: 'OUTER'
384
+ Permitted values: 'INNER', 'OUTER'
385
+ Types: str
386
+
387
+ allow_duplicates:
388
+ Optional argument.
389
+ Specifies if the result of concatenation can have duplicate rows.
390
+ Default value: True
391
+ Types: bool
392
+
393
+ sort:
394
+ Optional argument.
395
+ Specifies a flag to sort the columns axis if it is not already aligned when
396
+ the join argument is set to 'outer'.
397
+ Default value: False
398
+ Types: bool
399
+
400
+ ignore_index:
401
+ Optional argument.
402
+ Specifies whether to ignore the index columns in resulting DataFrame or not.
403
+ If True, then index columns will be ignored in the concat operation.
404
+ Default value: False
405
+ Types: bool
406
+
407
+ RETURNS:
408
+ teradataml DataFrame, if result does not contain any geometry data, otherwise returns teradataml GeoDataFrame.
409
+
410
+ RAISES:
411
+ TeradataMlException
412
+
413
+ EXAMPLES:
414
+ >>> from teradataml import load_example_data
415
+ >>> load_example_data("dataframe", "admissions_train")
416
+ >>> load_example_data("geodataframe", ["sample_shapes"])
417
+ >>> from teradataml.dataframe import concat
418
+ >>>
419
+ >>> # Default options
420
+ >>> df = DataFrame('admissions_train')
421
+ >>> df1 = df[df.gpa == 4].select(['id', 'stats', 'masters', 'gpa'])
422
+ >>> df1
423
+ stats masters gpa
424
+ id
425
+ 13 Advanced no 4.0
426
+ 29 Novice yes 4.0
427
+ 15 Advanced yes 4.0
428
+ >>> df2 = df[df.gpa < 2].select(['id', 'stats', 'programming', 'admitted'])
429
+ >>> df2
430
+ stats programming admitted
431
+ id
432
+ 24 Advanced Novice 1
433
+ 19 Advanced Advanced 0
434
+ >>> cdf = concat([df1, df2])
435
+ >>> cdf
436
+ stats masters gpa programming admitted
437
+ id
438
+ 19 Advanced None NaN Advanced 0
439
+ 24 Advanced None NaN Novice 1
440
+ 13 Advanced no 4.0 None None
441
+ 29 Novice yes 4.0 None None
442
+ 15 Advanced yes 4.0 None None
443
+ >>>
444
+ >>> # concat more than two DataFrames
445
+ >>> df3 = df[df.gpa == 3].select(['id', 'stats', 'programming', 'gpa'])
446
+ >>> df3
447
+ stats programming gpa
448
+ id
449
+ 36 Advanced Novice 3.0
450
+ >>> cdf = concat([df1, df2, df3])
451
+ >>> cdf
452
+ stats masters gpa programming admitted
453
+ id
454
+ 15 Advanced yes 4.0 None NaN
455
+ 19 Advanced None NaN Advanced 0.0
456
+ 36 Advanced None 3.0 Novice NaN
457
+ 29 Novice yes 4.0 None NaN
458
+ 13 Advanced no 4.0 None NaN
459
+ 24 Advanced None NaN Novice 1.0
460
+
461
+ >>> # join = 'inner'
462
+ >>> cdf = concat([df1, df2], join='inner')
463
+ >>> cdf
464
+ stats
465
+ id
466
+ 19 Advanced
467
+ 24 Advanced
468
+ 13 Advanced
469
+ 29 Novice
470
+ 15 Advanced
471
+ >>>
472
+ >>> # allow_duplicates = True (default)
473
+ >>> cdf = concat([df1, df2])
474
+ >>> cdf
475
+ stats masters gpa programming admitted
476
+ id
477
+ 19 Advanced None NaN Advanced 0
478
+ 24 Advanced None NaN Novice 1
479
+ 13 Advanced no 4.0 None None
480
+ 29 Novice yes 4.0 None None
481
+ 15 Advanced yes 4.0 None None
482
+ >>> cdf = concat([cdf, df2])
483
+ >>> cdf
484
+ stats masters gpa programming admitted
485
+ id
486
+ 19 Advanced None NaN Advanced 0
487
+ 13 Advanced no 4.0 None None
488
+ 24 Advanced None NaN Novice 1
489
+ 24 Advanced None NaN Novice 1
490
+ 19 Advanced None NaN Advanced 0
491
+ 29 Novice yes 4.0 None None
492
+ 15 Advanced yes 4.0 None None
493
+ >>>
494
+ >>> # allow_duplicates = False
495
+ >>> cdf = concat([cdf, df2], allow_duplicates=False)
496
+ >>> cdf
497
+ stats masters gpa programming admitted
498
+ id
499
+ 19 Advanced None NaN Advanced 0
500
+ 29 Novice yes 4.0 None None
501
+ 24 Advanced None NaN Novice 1
502
+ 15 Advanced yes 4.0 None None
503
+ 13 Advanced no 4.0 None None
504
+ >>>
505
+ >>> # sort = True
506
+ >>> cdf = concat([df1, df2], sort=True)
507
+ >>> cdf
508
+ admitted gpa masters programming stats
509
+ id
510
+ 19 0 NaN None Advanced Advanced
511
+ 24 1 NaN None Novice Advanced
512
+ 13 None 4.0 no None Advanced
513
+ 29 None 4.0 yes None Novice
514
+ 15 None 4.0 yes None Advanced
515
+ >>>
516
+ >>> # ignore_index = True
517
+ >>> cdf = concat([df1, df2], ignore_index=True)
518
+ >>> cdf
519
+ stats masters gpa programming admitted
520
+ 0 Advanced yes 4.0 None NaN
521
+ 1 Advanced None NaN Advanced 0.0
522
+ 2 Novice yes 4.0 None NaN
523
+ 3 Advanced None NaN Novice 1.0
524
+ 4 Advanced no 4.0 None NaN
525
+
526
+ # Perform concatenation of two GeoDataFrames
527
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
528
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
529
+ >>> geo_dataframe1
530
+
531
+ skey linestrings
532
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
533
+
534
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','polygons'])
535
+ >>> geo_dataframe2
536
+
537
+ skey polygons
538
+ 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
539
+ 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
540
+ 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
541
+ 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
542
+ 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
543
+ 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
544
+ 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
545
+ 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
546
+ 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
547
+
548
+
549
+ >>> concat([geo_dataframe1,geo_dataframe2])
550
+
551
+ skey linestrings polygons
552
+ 1009 None MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
553
+ 1005 None POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
554
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80) None
555
+ 1004 None POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
556
+ 1003 None POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
557
+ 1001 None POLYGON ((0 0,0 20,20 20,20 0,0 0))
558
+ 1002 None POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
559
+ 1007 None MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
560
+ 1006 None POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
561
+ 1008 None MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
562
+
563
+ # Perform concatenation of a DataFrame and GeoDataFrame which returns a GeoDataFrame.
564
+ >>> normal_df=df.select(['id','stats'])
565
+ >>> normal_df
566
+ stats
567
+ id
568
+ 34 Advanced
569
+ 32 Advanced
570
+ 11 Advanced
571
+ 40 Novice
572
+ 38 Advanced
573
+ 36 Advanced
574
+ 7 Novice
575
+ 26 Advanced
576
+ 19 Advanced
577
+ 13 Advanced
578
+ >>> geo_df = geo_dataframe[geo_dataframe.skey < 1010].select(['skey', 'polygons'])
579
+ >>> geo_df
580
+
581
+ skey polygons
582
+ 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
583
+ 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
584
+ 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
585
+ 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
586
+ 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
587
+ 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
588
+ 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
589
+ 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
590
+ 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
591
+
592
+ >>> idf = concat([normal_df, geo_df])
593
+ >>> idf
594
+ stats skey polygons
595
+ id
596
+ 38 Advanced None None
597
+ 7 Novice None None
598
+ 26 Advanced None None
599
+ 17 Advanced None None
600
+ 34 Advanced None None
601
+ 13 Advanced None None
602
+ 32 Advanced None None
603
+ 11 Advanced None None
604
+ 15 Advanced None None
605
+ 36 Advanced None None
606
+ >>>
607
+ """
608
+ concat_join_permitted_values = ['INNER', 'OUTER']
609
+
610
+ # Below matrix is list of list, where in each row contains following elements:
611
+ # Let's take an example of following, just to get an idea:
612
+ # [element1, element2, element3, element4, element5, element6]
613
+ # e.g.
614
+ # ["join", join, True, (str), True, concat_join_permitted_values]
615
+
616
+ # 1. element1 --> Argument Name, a string. ["join" in above example.]
617
+ # 2. element2 --> Argument itself. [join]
618
+ # 3. element3 --> Specifies a flag that mentions argument is optional or not.
619
+ # False, means required and True means optional.
620
+ # 4. element4 --> Tuple of accepted types. (str) in above example.
621
+ # 5. element5 --> True, means validate for empty value. Error will be raised, if empty values is passed.
622
+ # If not specified, means same as specifying False.
623
+ # 6. element6 --> A list of permitted values, an argument can accept.
624
+ # If not specified, it is as good as passing None. If a list is passed, validation will be
625
+ # performed for permitted values.
626
+ awu_matrix = []
627
+ awu_matrix.append(["df_list", df_list, False, (list)])
628
+ awu_matrix.append(["join", join, True, (str), True, concat_join_permitted_values])
629
+ awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
630
+ awu_matrix.append(["sort", sort, False, (bool)])
631
+ awu_matrix.append(["ignore_index", ignore_index, False, (bool)])
632
+ setop_type='concat'
633
+
634
+ # Validate Set operator arguments
635
+ __validate_setop_args(df_list, awu_matrix, setop_type)
636
+
637
+ # Generate the columns and their type to output, and check if the evaluation has to be lazy
638
+ master_columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort, ignore_index)
639
+
640
+ try:
641
+ aed_utils = AedUtils()
642
+
643
+ # Set the index_label to columns in first df's index_label if it is being projected,
644
+ # else set it to columns in second df's index_label if it is being projected, else go on till last.
645
+ # Finally set to None if none of df's have index_label
646
+ index_label = None
647
+ index_to_use = None
648
+ for df in df_list:
649
+ if df._index_label is not None and any(ind_col in master_columns_dict for ind_col in df._index_label):
650
+ index_label = []
651
+ index_to_use = df._index_label
652
+ break
653
+
654
+ if index_to_use is not None:
655
+ for ind_col in index_to_use:
656
+ if ind_col in master_columns_dict:
657
+ index_label.append(ind_col)
658
+
659
+ # Remove index columns if 'ignore_index' is set to True from master_columns_dict
660
+ if ignore_index and index_to_use is not None:
661
+ index_label = None
662
+ index_to_use = None
663
+
664
+ col_list = []
665
+ for i in range(len(df_list)):
666
+ col_list.append([])
667
+
668
+ # Now create the list of columns for each DataFrame to concatenate
669
+ type_compiler = td_type_compiler(td_dialect)
670
+ for col_name, value in master_columns_dict.items():
671
+ for i in range(len(col_list)):
672
+ if not value['col_present'][i]:
673
+ col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']),
674
+ UtilFuncs._teradata_quote_arg(col_name, "\"",
675
+ False)))
676
+ else:
677
+ col_name = UtilFuncs._process_for_teradata_keyword(col_name)
678
+ col_list[i].append(col_name)
679
+
680
+ input_table_columns = []
681
+ for i in range(len(col_list)):
682
+ input_table_columns.append(','.join(col_list[i]))
683
+
684
+ concat_nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
685
+ 'unionall' if allow_duplicates else 'union',
686
+ input_table_columns)
687
+ return __process_operation(master_columns_dict, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
688
+
689
+ except TeradataMlException:
690
+ raise
691
+ except Exception as err:
692
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
693
+ MessageCodes.SETOP_FAILED) from err
694
+
695
+
696
+ @collect_queryband(queryband="tdIntersect")
697
+ def td_intersect(df_list, allow_duplicates=True):
698
+ """
699
+ DESCRIPTION:
700
+ Function intersects a list of teradataml DataFrames or GeoDataFrames along the index axis and
701
+ returns a DataFrame with rows common to all input DataFrames.
702
+ Note:
703
+ This function should be applied to data frames of the same type: either all teradataml DataFrames,
704
+ or all GeoDataFrames.
705
+
706
+ PARAMETERS:
707
+ df_list:
708
+ Required argument.
709
+ Specifies the list of teradataml DataFrames or GeoDataFrames on which the intersection is to be performed.
710
+ Types: list of teradataml DataFrames or GeoDataFrames
711
+
712
+ allow_duplicates:
713
+ Optional argument.
714
+ Specifies if the result of intersection can have duplicate rows.
715
+ Default value: True
716
+ Types: bool
717
+
718
+ RETURNS:
719
+ teradataml DataFrame when intersect is performed on teradataml DataFrames.
720
+ teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
721
+
722
+ RAISES:
723
+ TeradataMlException, TypeError
724
+
725
+ EXAMPLES:
726
+ >>> from teradataml import load_example_data
727
+ >>> load_example_data("dataframe", "setop_test1")
728
+ >>> load_example_data("dataframe", "setop_test2")
729
+ >>> load_example_data("geodataframe", ["sample_shapes"])
730
+ >>> from teradataml.dataframe.setop import td_intersect
731
+ >>>
732
+ >>> df1 = DataFrame('setop_test1')
733
+ >>> df1
734
+ masters gpa stats programming admitted
735
+ id
736
+ 62 no 3.70 Advanced Advanced 1
737
+ 53 yes 3.50 Beginner Novice 1
738
+ 69 no 3.96 Advanced Advanced 1
739
+ 61 yes 4.00 Advanced Advanced 1
740
+ 58 no 3.13 Advanced Advanced 1
741
+ 51 yes 3.76 Beginner Beginner 0
742
+ 68 no 1.87 Advanced Novice 1
743
+ 66 no 3.87 Novice Beginner 1
744
+ 60 no 4.00 Advanced Novice 1
745
+ 59 no 3.65 Novice Novice 1
746
+ >>> df2 = DataFrame('setop_test2')
747
+ >>> df2
748
+ masters gpa stats programming admitted
749
+ id
750
+ 12 no 3.65 Novice Novice 1
751
+ 15 yes 4.00 Advanced Advanced 1
752
+ 14 yes 3.45 Advanced Advanced 0
753
+ 20 yes 3.90 Advanced Advanced 1
754
+ 18 yes 3.81 Advanced Advanced 1
755
+ 17 no 3.83 Advanced Advanced 1
756
+ 13 no 4.00 Advanced Novice 1
757
+ 11 no 3.13 Advanced Advanced 1
758
+ 60 no 4.00 Advanced Novice 1
759
+ 19 yes 1.98 Advanced Advanced 0
760
+ >>> idf = td_intersect([df1, df2])
761
+ >>> idf
762
+ masters gpa stats programming admitted
763
+ id
764
+ 64 yes 3.81 Advanced Advanced 1
765
+ 60 no 4.00 Advanced Novice 1
766
+ 58 no 3.13 Advanced Advanced 1
767
+ 68 no 1.87 Advanced Novice 1
768
+ 66 no 3.87 Novice Beginner 1
769
+ 60 no 4.00 Advanced Novice 1
770
+ 62 no 3.70 Advanced Advanced 1
771
+ >>>
772
+ >>> idf = td_intersect([df1, df2], allow_duplicates=False)
773
+ >>> idf
774
+ masters gpa stats programming admitted
775
+ id
776
+ 64 yes 3.81 Advanced Advanced 1
777
+ 60 no 4.00 Advanced Novice 1
778
+ 58 no 3.13 Advanced Advanced 1
779
+ 68 no 1.87 Advanced Novice 1
780
+ 66 no 3.87 Novice Beginner 1
781
+ 62 no 3.70 Advanced Advanced 1
782
+ >>> # intersecting more than two DataFrames
783
+ >>> df3 = df1[df1.gpa <= 3.5]
784
+ >>> df3
785
+ masters gpa stats programming admitted
786
+ id
787
+ 58 no 3.13 Advanced Advanced 1
788
+ 67 yes 3.46 Novice Beginner 0
789
+ 54 yes 3.50 Beginner Advanced 1
790
+ 68 no 1.87 Advanced Novice 1
791
+ 53 yes 3.50 Beginner Novice 1
792
+ >>> idf = td_intersect([df1, df2, df3])
793
+ >>> idf
794
+ masters gpa stats programming admitted
795
+ id
796
+ 58 no 3.13 Advanced Advanced 1
797
+ 68 no 1.87 Advanced Novice 1
798
+
799
+ # Perform intersection of two GeoDataFrames.
800
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
801
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
802
+ >>> geo_dataframe1
803
+
804
+ skey linestrings
805
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
806
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
807
+ >>> geo_dataframe2
808
+
809
+ skey linestrings
810
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
811
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
812
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
813
+ 1002 LINESTRING (1 3,3 0,0 1)
814
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
815
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
816
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
817
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
818
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
819
+ >>> td_intersect([geo_dataframe1,geo_dataframe2])
820
+
821
+ skey linestrings
822
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
823
+ """
824
+ awu_matrix = []
825
+ awu_matrix.append(["df_list", df_list, False, (list)])
826
+ awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
827
+ setop_type = 'td_intersect'
828
+ operation = 'intersect'
829
+
830
+ # Validate Set operator arguments
831
+ __validate_setop_args(df_list, awu_matrix, setop_type)
832
+
833
+ return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
834
+
835
+
836
+ @collect_queryband(queryband="tdMinus")
837
+ def td_minus(df_list, allow_duplicates=True):
838
+ """
839
+ DESCRIPTION:
840
+ This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
841
+ and not in other teradataml DataFrames or GeoDataFrames along the index axis.
842
+ Note:
843
+ This function should be applied to data frames of the same type: either all teradataml DataFrames,
844
+ or all GeoDataFrames.
845
+
846
+ PARAMETERS:
847
+ df_list:
848
+ Required argument.
849
+ Specifies the list of teradataml DataFrames or GeoDataFrames on which the minus
850
+ operation is to be performed.
851
+ Types: list of teradataml DataFrames or GeoDataFrames
852
+
853
+ allow_duplicates:
854
+ Optional argument.
855
+ Specifies if the result of minus operation can have duplicate rows.
856
+ Default value: True
857
+ Types: bool
858
+
859
+ RETURNS:
860
+ teradataml DataFrame when operation is performed on teradataml DataFrames.
861
+ teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
862
+
863
+ RAISES:
864
+ TeradataMlException, TypeError
865
+
866
+ EXAMPLES:
867
+ >>> from teradataml import load_example_data
868
+ >>> load_example_data("dataframe", "setop_test1")
869
+ >>> load_example_data("dataframe", "setop_test2")
870
+ >>> load_example_data("geodataframe", ["sample_shapes"])
871
+ >>> from teradataml.dataframe.setop import td_minus
872
+ >>>
873
+ >>> df1 = DataFrame('setop_test1')
874
+ >>> df1
875
+ masters gpa stats programming admitted
876
+ id
877
+ 62 no 3.70 Advanced Advanced 1
878
+ 53 yes 3.50 Beginner Novice 1
879
+ 69 no 3.96 Advanced Advanced 1
880
+ 61 yes 4.00 Advanced Advanced 1
881
+ 58 no 3.13 Advanced Advanced 1
882
+ 51 yes 3.76 Beginner Beginner 0
883
+ 68 no 1.87 Advanced Novice 1
884
+ 66 no 3.87 Novice Beginner 1
885
+ 60 no 4.00 Advanced Novice 1
886
+ 59 no 3.65 Novice Novice 1
887
+ >>> df2 = DataFrame('setop_test2')
888
+ >>> df2
889
+ masters gpa stats programming admitted
890
+ id
891
+ 12 no 3.65 Novice Novice 1
892
+ 15 yes 4.00 Advanced Advanced 1
893
+ 14 yes 3.45 Advanced Advanced 0
894
+ 20 yes 3.90 Advanced Advanced 1
895
+ 18 yes 3.81 Advanced Advanced 1
896
+ 17 no 3.83 Advanced Advanced 1
897
+ 13 no 4.00 Advanced Novice 1
898
+ 11 no 3.13 Advanced Advanced 1
899
+ 60 no 4.00 Advanced Novice 1
900
+ 19 yes 1.98 Advanced Advanced 0
901
+ >>> idf = td_minus([df1[df1.id<55] , df2])
902
+ >>> idf
903
+ masters gpa stats programming admitted
904
+ id
905
+ 51 yes 3.76 Beginner Beginner 0
906
+ 50 yes 3.95 Beginner Beginner 0
907
+ 54 yes 3.50 Beginner Advanced 1
908
+ 52 no 3.70 Novice Beginner 1
909
+ 53 yes 3.50 Beginner Novice 1
910
+ 53 yes 3.50 Beginner Novice 1
911
+ >>>
912
+ >>> idf = td_minus([df1[df1.id<55] , df2], allow_duplicates=False)
913
+ >>> idf
914
+ masters gpa stats programming admitted
915
+ id
916
+ 54 yes 3.50 Beginner Advanced 1
917
+ 51 yes 3.76 Beginner Beginner 0
918
+ 53 yes 3.50 Beginner Novice 1
919
+ 50 yes 3.95 Beginner Beginner 0
920
+ 52 no 3.70 Novice Beginner 1
921
+ >>> # applying minus on more than two DataFrames
922
+ >>> df3 = df1[df1.gpa <= 3.9]
923
+ >>> idf = td_minus([df1, df2, df3])
924
+ >>> idf
925
+ masters gpa stats programming admitted
926
+ id
927
+ 61 yes 4.00 Advanced Advanced 1
928
+ 50 yes 3.95 Beginner Beginner 0
929
+ 69 no 3.96 Advanced Advanced 1
930
+
931
+ # td_minus on GeoDataFrame
932
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
933
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
934
+ >>> geo_dataframe1
935
+ skey linestrings
936
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
937
+
938
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
939
+ >>> geo_dataframe2
940
+ skey linestrings
941
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
942
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
943
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
944
+ 1002 LINESTRING (1 3,3 0,0 1)
945
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
946
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
947
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
948
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
949
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
950
+
951
+ >>> td_minus([geo_dataframe2,geo_dataframe1])
952
+ linestrings
953
+ skey
954
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
955
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
956
+ 1002 LINESTRING (1 3,3 0,0 1)
957
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
958
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
959
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
960
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
961
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
962
+ """
963
+ awu_matrix = []
964
+ awu_matrix.append(["df_list", df_list, False, (list)])
965
+ awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
966
+ setop_type = 'td_except' if (inspect.stack()[2][3] and inspect.stack()[2][3] == 'td_except') else 'td_minus'
967
+ operation = 'minus'
968
+
969
+ # Validate Set operator arguments
970
+ __validate_setop_args(df_list, awu_matrix, setop_type)
971
+
972
+ return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
973
+
974
+
975
+ @collect_queryband(queryband="tdExcept")
976
+ def td_except(df_list, allow_duplicates=True):
977
+ """
978
+ DESCRIPTION:
979
+ This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
980
+ and not in other teradataml DataFrames or GeoDataFrames along the index axis.
981
+ Note:
982
+ This function should be applied to data frames of the same type: either all teradataml DataFrames,
983
+ or all GeoDataFrames.
984
+
985
+ PARAMETERS:
986
+ df_list:
987
+ Required argument.
988
+ Specifies the list of teradataml DataFrames or GeoDataFrames on which the except
989
+ operation is to be performed.
990
+ Types: list of teradataml DataFrames or GeoDataFrames
991
+
992
+ allow_duplicates:
993
+ Optional argument.
994
+ Specifies if the result of except operation can have duplicate rows.
995
+ Default value: True
996
+ Types: bool
997
+
998
+ RETURNS:
999
+ teradataml DataFrame when operation is performed on teradataml DataFrames.
1000
+ teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
1001
+
1002
+ RAISES:
1003
+ TeradataMlException, TypeError
1004
+
1005
+ EXAMPLES:
1006
+ >>> from teradataml import load_example_data
1007
+ >>> load_example_data("dataframe", "setop_test1")
1008
+ >>> load_example_data("dataframe", "setop_test2")
1009
+ >>> load_example_data("geodataframe", ["sample_shapes"])
1010
+ >>> from teradataml.dataframe.setop import td_except
1011
+ >>>
1012
+ >>> df1 = DataFrame('setop_test1')
1013
+ >>> df1
1014
+ masters gpa stats programming admitted
1015
+ id
1016
+ 62 no 3.70 Advanced Advanced 1
1017
+ 53 yes 3.50 Beginner Novice 1
1018
+ 69 no 3.96 Advanced Advanced 1
1019
+ 61 yes 4.00 Advanced Advanced 1
1020
+ 58 no 3.13 Advanced Advanced 1
1021
+ 51 yes 3.76 Beginner Beginner 0
1022
+ 68 no 1.87 Advanced Novice 1
1023
+ 66 no 3.87 Novice Beginner 1
1024
+ 60 no 4.00 Advanced Novice 1
1025
+ 59 no 3.65 Novice Novice 1
1026
+ >>> df2 = DataFrame('setop_test2')
1027
+ >>> df2
1028
+ masters gpa stats programming admitted
1029
+ id
1030
+ 12 no 3.65 Novice Novice 1
1031
+ 15 yes 4.00 Advanced Advanced 1
1032
+ 14 yes 3.45 Advanced Advanced 0
1033
+ 20 yes 3.90 Advanced Advanced 1
1034
+ 18 yes 3.81 Advanced Advanced 1
1035
+ 17 no 3.83 Advanced Advanced 1
1036
+ 13 no 4.00 Advanced Novice 1
1037
+ 11 no 3.13 Advanced Advanced 1
1038
+ 60 no 4.00 Advanced Novice 1
1039
+ 19 yes 1.98 Advanced Advanced 0
1040
+ >>> idf = td_except([df1[df1.id<55] , df2])
1041
+ >>> idf
1042
+ masters gpa stats programming admitted
1043
+ id
1044
+ 51 yes 3.76 Beginner Beginner 0
1045
+ 50 yes 3.95 Beginner Beginner 0
1046
+ 54 yes 3.50 Beginner Advanced 1
1047
+ 52 no 3.70 Novice Beginner 1
1048
+ 53 yes 3.50 Beginner Novice 1
1049
+ 53 yes 3.50 Beginner Novice 1
1050
+ >>>
1051
+ >>> idf = td_except([df1[df1.id<55] , df2], allow_duplicates=False)
1052
+ >>> idf
1053
+ masters gpa stats programming admitted
1054
+ id
1055
+ 54 yes 3.50 Beginner Advanced 1
1056
+ 51 yes 3.76 Beginner Beginner 0
1057
+ 53 yes 3.50 Beginner Novice 1
1058
+ 50 yes 3.95 Beginner Beginner 0
1059
+ 52 no 3.70 Novice Beginner 1
1060
+ >>> # applying except on more than two DataFrames
1061
+ >>> df3 = df1[df1.gpa <= 3.9]
1062
+ >>> idf = td_except([df1, df2, df3])
1063
+ >>> idf
1064
+ masters gpa stats programming admitted
1065
+ id
1066
+ 61 yes 4.00 Advanced Advanced 1
1067
+ 50 yes 3.95 Beginner Beginner 0
1068
+ 69 no 3.96 Advanced Advanced 1
1069
+
1070
+ # td_except on GeoDataFrames
1071
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
1072
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
1073
+ >>> geo_dataframe1
1074
+ skey linestrings
1075
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1076
+
1077
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
1078
+ >>> geo_dataframe2
1079
+ skey linestrings
1080
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1081
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1082
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1083
+ 1002 LINESTRING (1 3,3 0,0 1)
1084
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
1085
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1086
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1087
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1088
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1089
+
1090
+ >>> td_except([geo_dataframe2,geo_dataframe1])
1091
+ skey linestrings
1092
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1093
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1094
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1095
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1096
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1097
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
1098
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1099
+ 1002 LINESTRING (1 3,3 0,0 1)
1100
+ """
1101
+ return td_minus(df_list, allow_duplicates)
1102
+
1103
+
1104
+ def __process_setop_operation(df_list, allow_duplicates, setop_type, operation):
1105
+ """
1106
+ DESCRIPTION:
1107
+ Internal function to process set opertaion and return the result DataFrame/GeoDataFrame.
1108
+
1109
+ PARAMETERS:
1110
+ df_list:
1111
+ Required argument.
1112
+ Specifies the list of teradataml DataFrames/GeoDataFrames on which the except
1113
+ operation is to be performed.
1114
+ Types: list of teradataml DataFrames
1115
+
1116
+ allow_duplicates:
1117
+ Optional argument.
1118
+ Specifies if the result of except operation can have duplicate rows.
1119
+ Default value: True
1120
+ Types: bool
1121
+
1122
+ setop_type:
1123
+ Required argument.
1124
+ Specifies set opertaion.
1125
+ Types: str
1126
+
1127
+ operation:
1128
+ Required argument.
1129
+ Specifies set opertaion name.
1130
+ Types: str
1131
+
1132
+ RETURNS:
1133
+ teradataml DataFrame/GeoDataFrame
1134
+
1135
+ RAISES:
1136
+ TeradataMlException
1137
+
1138
+ EXAMPLES:
1139
+ >>> __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
1140
+
1141
+ """
1142
+
1143
+ # Check if set operation can be lazy or not
1144
+ is_lazy = __check_setop_if_lazy(df_list)
1145
+
1146
+ # Get the first DataFrame's metaexpr
1147
+ first_df_metaexpr = df_list[0]._metaexpr
1148
+
1149
+ try:
1150
+ aed_utils = AedUtils()
1151
+ input_table_columns = []
1152
+ for i in range(len(df_list)):
1153
+ col_list = []
1154
+ for j in range(len(df_list[i].columns)):
1155
+ col_list.append(UtilFuncs._process_for_teradata_keyword(df_list[i].columns[j]))
1156
+
1157
+ input_table_columns.append(','.join(col_list))
1158
+
1159
+ nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
1160
+ '{}all'.format(operation) if allow_duplicates else '{}'.format(operation),
1161
+ input_table_columns)
1162
+
1163
+ # Set the index_label to columns in first df's index_label if it is not None,
1164
+ # else set it to None i.e. no index_label.
1165
+ index_label = []
1166
+ index_to_use = None
1167
+ index_to_use = df_list[0]._index_label if df_list[0]._index_label is not None else None
1168
+
1169
+ if index_to_use is not None:
1170
+ index_label = index_to_use
1171
+
1172
+ class_name = df_list[0].__class__.__name__
1173
+ return __process_operation(first_df_metaexpr, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name)
1174
+
1175
+ except TeradataMlException:
1176
+ raise
1177
+ except Exception as err:
1178
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
1179
+ MessageCodes.SETOP_FAILED) from err