teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1285) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1864 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2013 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +804 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1628 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +993 -0
  31. teradataml/automl/data_transformation.py +727 -0
  32. teradataml/automl/feature_engineering.py +1648 -0
  33. teradataml/automl/feature_exploration.py +547 -0
  34. teradataml/automl/model_evaluation.py +163 -0
  35. teradataml/automl/model_training.py +887 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/pkce_client.py +481 -481
  41. teradataml/common/aed_utils.py +6 -2
  42. teradataml/common/bulk_exposed_utils.py +111 -111
  43. teradataml/common/constants.py +1433 -1441
  44. teradataml/common/deprecations.py +160 -0
  45. teradataml/common/exceptions.py +73 -73
  46. teradataml/common/formula.py +742 -742
  47. teradataml/common/garbagecollector.py +592 -635
  48. teradataml/common/messagecodes.py +422 -431
  49. teradataml/common/messages.py +227 -231
  50. teradataml/common/sqlbundle.py +693 -693
  51. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  52. teradataml/common/utils.py +2418 -2500
  53. teradataml/common/warnings.py +25 -25
  54. teradataml/common/wrapper_utils.py +1 -110
  55. teradataml/config/dummy_file1.cfg +4 -4
  56. teradataml/config/dummy_file2.cfg +2 -2
  57. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  58. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  59. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  60. teradataml/context/aed_context.py +217 -217
  61. teradataml/context/context.py +1071 -999
  62. teradataml/data/A_loan.csv +19 -19
  63. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  64. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  65. teradataml/data/B_loan.csv +49 -49
  66. teradataml/data/BuoyData2.csv +17 -17
  67. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  68. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  69. teradataml/data/Convolve2RealsLeft.csv +5 -5
  70. teradataml/data/Convolve2RealsRight.csv +5 -5
  71. teradataml/data/Convolve2ValidLeft.csv +11 -11
  72. teradataml/data/Convolve2ValidRight.csv +11 -11
  73. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  74. teradataml/data/Orders1_12mf.csv +24 -24
  75. teradataml/data/Pi_loan.csv +7 -7
  76. teradataml/data/SMOOTHED_DATA.csv +7 -7
  77. teradataml/data/TestDFFT8.csv +9 -9
  78. teradataml/data/TestRiver.csv +109 -109
  79. teradataml/data/Traindata.csv +28 -28
  80. teradataml/data/acf.csv +17 -17
  81. teradataml/data/adaboost_example.json +34 -34
  82. teradataml/data/adaboostpredict_example.json +24 -24
  83. teradataml/data/additional_table.csv +10 -10
  84. teradataml/data/admissions_test.csv +21 -21
  85. teradataml/data/admissions_train.csv +41 -41
  86. teradataml/data/admissions_train_nulls.csv +41 -41
  87. teradataml/data/ageandheight.csv +13 -13
  88. teradataml/data/ageandpressure.csv +31 -31
  89. teradataml/data/antiselect_example.json +36 -36
  90. teradataml/data/antiselect_input.csv +8 -8
  91. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  92. teradataml/data/applicant_external.csv +6 -6
  93. teradataml/data/applicant_reference.csv +6 -6
  94. teradataml/data/arima_example.json +9 -9
  95. teradataml/data/assortedtext_input.csv +8 -8
  96. teradataml/data/attribution_example.json +33 -33
  97. teradataml/data/attribution_sample_table.csv +27 -27
  98. teradataml/data/attribution_sample_table1.csv +6 -6
  99. teradataml/data/attribution_sample_table2.csv +11 -11
  100. teradataml/data/bank_churn.csv +10001 -0
  101. teradataml/data/bank_web_clicks1.csv +42 -42
  102. teradataml/data/bank_web_clicks2.csv +91 -91
  103. teradataml/data/bank_web_url.csv +85 -85
  104. teradataml/data/barrier.csv +2 -2
  105. teradataml/data/barrier_new.csv +3 -3
  106. teradataml/data/betweenness_example.json +13 -13
  107. teradataml/data/bin_breaks.csv +8 -8
  108. teradataml/data/bin_fit_ip.csv +3 -3
  109. teradataml/data/binary_complex_left.csv +11 -11
  110. teradataml/data/binary_complex_right.csv +11 -11
  111. teradataml/data/binary_matrix_complex_left.csv +21 -21
  112. teradataml/data/binary_matrix_complex_right.csv +21 -21
  113. teradataml/data/binary_matrix_real_left.csv +21 -21
  114. teradataml/data/binary_matrix_real_right.csv +21 -21
  115. teradataml/data/blood2ageandweight.csv +26 -26
  116. teradataml/data/bmi.csv +501 -0
  117. teradataml/data/boston.csv +507 -507
  118. teradataml/data/buoydata_mix.csv +11 -11
  119. teradataml/data/burst_data.csv +5 -5
  120. teradataml/data/burst_example.json +20 -20
  121. teradataml/data/byom_example.json +17 -17
  122. teradataml/data/bytes_table.csv +3 -3
  123. teradataml/data/cal_housing_ex_raw.csv +70 -70
  124. teradataml/data/callers.csv +7 -7
  125. teradataml/data/calls.csv +10 -10
  126. teradataml/data/cars_hist.csv +33 -33
  127. teradataml/data/cat_table.csv +24 -24
  128. teradataml/data/ccm_example.json +31 -31
  129. teradataml/data/ccm_input.csv +91 -91
  130. teradataml/data/ccm_input2.csv +13 -13
  131. teradataml/data/ccmexample.csv +101 -101
  132. teradataml/data/ccmprepare_example.json +8 -8
  133. teradataml/data/ccmprepare_input.csv +91 -91
  134. teradataml/data/cfilter_example.json +12 -12
  135. teradataml/data/changepointdetection_example.json +18 -18
  136. teradataml/data/changepointdetectionrt_example.json +8 -8
  137. teradataml/data/chi_sq.csv +2 -2
  138. teradataml/data/churn_data.csv +14 -14
  139. teradataml/data/churn_emission.csv +35 -35
  140. teradataml/data/churn_initial.csv +3 -3
  141. teradataml/data/churn_state_transition.csv +5 -5
  142. teradataml/data/citedges_2.csv +745 -745
  143. teradataml/data/citvertices_2.csv +1210 -1210
  144. teradataml/data/clicks2.csv +16 -16
  145. teradataml/data/clickstream.csv +12 -12
  146. teradataml/data/clickstream1.csv +11 -11
  147. teradataml/data/closeness_example.json +15 -15
  148. teradataml/data/complaints.csv +21 -21
  149. teradataml/data/complaints_mini.csv +3 -3
  150. teradataml/data/complaints_testtoken.csv +224 -224
  151. teradataml/data/complaints_tokens_test.csv +353 -353
  152. teradataml/data/complaints_traintoken.csv +472 -472
  153. teradataml/data/computers_category.csv +1001 -1001
  154. teradataml/data/computers_test1.csv +1252 -1252
  155. teradataml/data/computers_train1.csv +5009 -5009
  156. teradataml/data/computers_train1_clustered.csv +5009 -5009
  157. teradataml/data/confusionmatrix_example.json +9 -9
  158. teradataml/data/conversion_event_table.csv +3 -3
  159. teradataml/data/corr_input.csv +17 -17
  160. teradataml/data/correlation_example.json +11 -11
  161. teradataml/data/coxhazardratio_example.json +39 -39
  162. teradataml/data/coxph_example.json +15 -15
  163. teradataml/data/coxsurvival_example.json +28 -28
  164. teradataml/data/cpt.csv +41 -41
  165. teradataml/data/credit_ex_merged.csv +45 -45
  166. teradataml/data/customer_loyalty.csv +301 -301
  167. teradataml/data/customer_loyalty_newseq.csv +31 -31
  168. teradataml/data/dataframe_example.json +146 -146
  169. teradataml/data/decisionforest_example.json +37 -37
  170. teradataml/data/decisionforestpredict_example.json +38 -38
  171. teradataml/data/decisiontree_example.json +21 -21
  172. teradataml/data/decisiontreepredict_example.json +45 -45
  173. teradataml/data/dfft2_size4_real.csv +17 -17
  174. teradataml/data/dfft2_test_matrix16.csv +17 -17
  175. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  176. teradataml/data/diabetes.csv +443 -443
  177. teradataml/data/diabetes_test.csv +89 -89
  178. teradataml/data/dict_table.csv +5 -5
  179. teradataml/data/docperterm_table.csv +4 -4
  180. teradataml/data/docs/__init__.py +1 -1
  181. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  182. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  183. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  184. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  185. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  186. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  187. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  188. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  189. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  190. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  191. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  192. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  193. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  194. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  195. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  196. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  197. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  198. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  199. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  200. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  201. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  202. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  203. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  204. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  205. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  206. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  207. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  208. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  209. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +132 -132
  210. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +103 -103
  211. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  212. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +101 -101
  213. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  214. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  215. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  216. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  217. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  218. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  219. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  220. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  221. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  222. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  223. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  224. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  225. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  226. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  227. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  228. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  229. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  230. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  231. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  232. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  233. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  234. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +126 -126
  235. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  236. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  237. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  238. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  239. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  240. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  241. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  242. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  243. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +243 -243
  244. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  245. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  246. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  247. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  248. teradataml/data/docs/sqle/docs_17_20/FTest.py +160 -160
  249. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  250. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  251. teradataml/data/docs/sqle/docs_17_20/GLM.py +380 -380
  252. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  253. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  254. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  255. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +123 -123
  256. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  257. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  258. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  259. teradataml/data/docs/sqle/docs_17_20/KMeans.py +204 -204
  260. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  261. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  262. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  263. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  264. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  265. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  266. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  267. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  268. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +117 -117
  269. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  270. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  271. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  272. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  273. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +225 -225
  274. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +115 -115
  275. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  276. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  277. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  278. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  279. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  280. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  281. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  282. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  283. teradataml/data/docs/sqle/docs_17_20/ROC.py +163 -163
  284. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  285. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  286. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  287. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  288. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  289. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  290. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  291. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  292. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +202 -202
  293. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  294. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +197 -197
  295. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +110 -109
  296. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  297. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  298. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  299. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  300. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  301. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  302. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  303. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  304. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +171 -171
  305. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  306. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  307. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  308. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  309. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  310. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  311. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  312. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  313. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  314. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  315. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  316. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +353 -353
  317. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +275 -275
  318. teradataml/data/docs/sqle/docs_17_20/ZTest.py +155 -155
  319. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  320. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  321. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  322. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  323. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  324. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  325. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  326. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  327. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  328. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  329. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  330. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  331. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  332. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  333. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  334. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  335. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  336. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  337. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  338. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  339. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  340. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  341. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  342. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  343. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  344. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  345. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  346. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  347. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  348. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  349. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  350. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  351. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  352. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  353. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  354. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  355. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  356. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  357. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  358. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  359. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  360. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  361. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  362. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  363. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  364. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  365. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  366. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  367. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  368. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  369. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  370. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  371. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  372. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  373. teradataml/data/dtw_example.json +17 -17
  374. teradataml/data/dtw_t1.csv +11 -11
  375. teradataml/data/dtw_t2.csv +4 -4
  376. teradataml/data/dwt2d_example.json +15 -15
  377. teradataml/data/dwt_example.json +14 -14
  378. teradataml/data/dwt_filter_dim.csv +5 -5
  379. teradataml/data/emission.csv +9 -9
  380. teradataml/data/emp_table_by_dept.csv +19 -19
  381. teradataml/data/employee_info.csv +4 -4
  382. teradataml/data/employee_table.csv +6 -6
  383. teradataml/data/excluding_event_table.csv +2 -2
  384. teradataml/data/finance_data.csv +6 -6
  385. teradataml/data/finance_data2.csv +61 -61
  386. teradataml/data/finance_data3.csv +93 -93
  387. teradataml/data/fish.csv +160 -0
  388. teradataml/data/fm_blood2ageandweight.csv +26 -26
  389. teradataml/data/fmeasure_example.json +11 -11
  390. teradataml/data/followers_leaders.csv +10 -10
  391. teradataml/data/fpgrowth_example.json +12 -12
  392. teradataml/data/frequentpaths_example.json +29 -29
  393. teradataml/data/friends.csv +9 -9
  394. teradataml/data/fs_input.csv +33 -33
  395. teradataml/data/fs_input1.csv +33 -33
  396. teradataml/data/genData.csv +513 -513
  397. teradataml/data/geodataframe_example.json +39 -39
  398. teradataml/data/glass_types.csv +215 -0
  399. teradataml/data/glm_admissions_model.csv +12 -12
  400. teradataml/data/glm_example.json +29 -29
  401. teradataml/data/glml1l2_example.json +28 -28
  402. teradataml/data/glml1l2predict_example.json +54 -54
  403. teradataml/data/glmpredict_example.json +54 -54
  404. teradataml/data/gq_t1.csv +21 -21
  405. teradataml/data/hconvolve_complex_right.csv +5 -5
  406. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  407. teradataml/data/histogram_example.json +11 -11
  408. teradataml/data/hmmdecoder_example.json +78 -78
  409. teradataml/data/hmmevaluator_example.json +24 -24
  410. teradataml/data/hmmsupervised_example.json +10 -10
  411. teradataml/data/hmmunsupervised_example.json +7 -7
  412. teradataml/data/house_values.csv +12 -12
  413. teradataml/data/house_values2.csv +13 -13
  414. teradataml/data/housing_cat.csv +7 -7
  415. teradataml/data/housing_data.csv +9 -9
  416. teradataml/data/housing_test.csv +47 -47
  417. teradataml/data/housing_test_binary.csv +47 -47
  418. teradataml/data/housing_train.csv +493 -493
  419. teradataml/data/housing_train_attribute.csv +4 -4
  420. teradataml/data/housing_train_binary.csv +437 -437
  421. teradataml/data/housing_train_parameter.csv +2 -2
  422. teradataml/data/housing_train_response.csv +493 -493
  423. teradataml/data/ibm_stock.csv +370 -370
  424. teradataml/data/ibm_stock1.csv +370 -370
  425. teradataml/data/identitymatch_example.json +21 -21
  426. teradataml/data/idf_table.csv +4 -4
  427. teradataml/data/impressions.csv +101 -101
  428. teradataml/data/inflation.csv +21 -21
  429. teradataml/data/initial.csv +3 -3
  430. teradataml/data/insect_sprays.csv +12 -12
  431. teradataml/data/insurance.csv +1339 -1339
  432. teradataml/data/interpolator_example.json +12 -12
  433. teradataml/data/iris_altinput.csv +481 -481
  434. teradataml/data/iris_attribute_output.csv +8 -8
  435. teradataml/data/iris_attribute_test.csv +121 -121
  436. teradataml/data/iris_attribute_train.csv +481 -481
  437. teradataml/data/iris_category_expect_predict.csv +31 -31
  438. teradataml/data/iris_data.csv +151 -0
  439. teradataml/data/iris_input.csv +151 -151
  440. teradataml/data/iris_response_train.csv +121 -121
  441. teradataml/data/iris_test.csv +31 -31
  442. teradataml/data/iris_train.csv +121 -121
  443. teradataml/data/join_table1.csv +4 -4
  444. teradataml/data/join_table2.csv +4 -4
  445. teradataml/data/jsons/anly_function_name.json +6 -6
  446. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  447. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  448. teradataml/data/jsons/byom/h2opredict.json +194 -194
  449. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  450. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  451. teradataml/data/jsons/paired_functions.json +435 -435
  452. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  453. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  454. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  455. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  456. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  457. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  458. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  459. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  460. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  461. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  462. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  463. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  464. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  465. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  466. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  467. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  468. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  469. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  470. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  471. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  472. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  473. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  474. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  475. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  476. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  477. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  478. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  479. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  480. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  481. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  482. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  483. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  484. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  485. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  486. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  487. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  488. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  489. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  490. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  491. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  492. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  493. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  494. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  495. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  496. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  497. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  498. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  499. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  500. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  501. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  502. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  503. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  504. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  505. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  506. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  507. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  508. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  509. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  510. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  511. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  512. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  513. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  514. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  515. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  516. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  517. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  518. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  519. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  520. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  521. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  522. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  523. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  524. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  525. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  526. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  527. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  528. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  529. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  531. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  532. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  533. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  534. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  535. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  536. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  537. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  539. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  540. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  541. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  542. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  543. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  544. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  545. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  546. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  547. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  548. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  549. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  550. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  551. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  552. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  553. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  554. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  555. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  556. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  557. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  558. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +76 -76
  559. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  560. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  561. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  562. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  563. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  564. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  565. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  566. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  567. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  568. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  569. teradataml/data/jsons/sqle/17.20/TD_FTest.json +186 -186
  570. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  571. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  572. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  573. teradataml/data/jsons/sqle/17.20/TD_GLM.json +431 -431
  574. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +125 -125
  575. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  576. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  577. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +91 -91
  578. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  579. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  580. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  581. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +211 -211
  582. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  583. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  584. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  585. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +101 -101
  586. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  587. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  588. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  589. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  590. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  591. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  592. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  593. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  594. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  595. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  596. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  597. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  598. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  599. teradataml/data/jsons/sqle/17.20/TD_ROC.json +177 -177
  600. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  601. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  602. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  603. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  604. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  605. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  606. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  607. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  608. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +124 -124
  609. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +156 -156
  610. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +70 -70
  611. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  612. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  613. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  614. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  615. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  616. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  617. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  618. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  619. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  620. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  621. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  622. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  623. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  624. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  625. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +312 -312
  626. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +182 -182
  627. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +170 -170
  628. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  629. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  630. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  631. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  632. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  633. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  634. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  635. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  636. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  637. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  638. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  639. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  640. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  641. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  642. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  643. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  644. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  645. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  646. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  647. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  648. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  649. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  650. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  651. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  653. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  654. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  655. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  656. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  657. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  658. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  659. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  660. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  661. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  662. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  663. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  664. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  665. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  666. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  667. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  668. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  669. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  670. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  671. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  672. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  673. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  674. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  675. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  676. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  677. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  678. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  679. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  680. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  681. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  682. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  683. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  684. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  685. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  686. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  687. teradataml/data/kmeans_example.json +17 -17
  688. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  689. teradataml/data/knn_example.json +18 -18
  690. teradataml/data/knnrecommender_example.json +6 -6
  691. teradataml/data/knnrecommenderpredict_example.json +12 -12
  692. teradataml/data/lar_example.json +17 -17
  693. teradataml/data/larpredict_example.json +30 -30
  694. teradataml/data/lc_new_predictors.csv +5 -5
  695. teradataml/data/lc_new_reference.csv +9 -9
  696. teradataml/data/lda_example.json +8 -8
  697. teradataml/data/ldainference_example.json +14 -14
  698. teradataml/data/ldatopicsummary_example.json +8 -8
  699. teradataml/data/levendist_input.csv +13 -13
  700. teradataml/data/levenshteindistance_example.json +10 -10
  701. teradataml/data/linreg_example.json +9 -9
  702. teradataml/data/load_example_data.py +326 -323
  703. teradataml/data/loan_prediction.csv +295 -295
  704. teradataml/data/lungcancer.csv +138 -138
  705. teradataml/data/mappingdata.csv +12 -12
  706. teradataml/data/milk_timeseries.csv +157 -157
  707. teradataml/data/min_max_titanic.csv +4 -4
  708. teradataml/data/minhash_example.json +6 -6
  709. teradataml/data/ml_ratings.csv +7547 -7547
  710. teradataml/data/ml_ratings_10.csv +2445 -2445
  711. teradataml/data/model1_table.csv +5 -5
  712. teradataml/data/model2_table.csv +5 -5
  713. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  714. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  715. teradataml/data/modularity_example.json +12 -12
  716. teradataml/data/movavg_example.json +7 -7
  717. teradataml/data/mtx1.csv +7 -7
  718. teradataml/data/mtx2.csv +13 -13
  719. teradataml/data/multi_model_classification.csv +401 -0
  720. teradataml/data/multi_model_regression.csv +401 -0
  721. teradataml/data/mvdfft8.csv +9 -9
  722. teradataml/data/naivebayes_example.json +9 -9
  723. teradataml/data/naivebayespredict_example.json +19 -19
  724. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  725. teradataml/data/naivebayestextclassifier_example.json +8 -8
  726. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  727. teradataml/data/name_Find_configure.csv +10 -10
  728. teradataml/data/namedentityfinder_example.json +14 -14
  729. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  730. teradataml/data/namedentityfindertrainer_example.json +6 -6
  731. teradataml/data/nb_iris_input_test.csv +31 -31
  732. teradataml/data/nb_iris_input_train.csv +121 -121
  733. teradataml/data/nbp_iris_model.csv +13 -13
  734. teradataml/data/ner_extractor_text.csv +2 -2
  735. teradataml/data/ner_sports_test2.csv +29 -29
  736. teradataml/data/ner_sports_train.csv +501 -501
  737. teradataml/data/nerevaluator_example.json +5 -5
  738. teradataml/data/nerextractor_example.json +18 -18
  739. teradataml/data/nermem_sports_test.csv +17 -17
  740. teradataml/data/nermem_sports_train.csv +50 -50
  741. teradataml/data/nertrainer_example.json +6 -6
  742. teradataml/data/ngrams_example.json +6 -6
  743. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  744. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  745. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  746. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  747. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  748. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  749. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  750. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  751. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  752. teradataml/data/npath_example.json +23 -23
  753. teradataml/data/ntree_example.json +14 -14
  754. teradataml/data/numeric_strings.csv +4 -4
  755. teradataml/data/numerics.csv +4 -4
  756. teradataml/data/ocean_buoy.csv +17 -17
  757. teradataml/data/ocean_buoy2.csv +17 -17
  758. teradataml/data/ocean_buoys.csv +27 -27
  759. teradataml/data/ocean_buoys2.csv +10 -10
  760. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  761. teradataml/data/ocean_buoys_seq.csv +29 -29
  762. teradataml/data/openml_example.json +63 -0
  763. teradataml/data/optional_event_table.csv +4 -4
  764. teradataml/data/orders1.csv +11 -11
  765. teradataml/data/orders1_12.csv +12 -12
  766. teradataml/data/orders_ex.csv +4 -4
  767. teradataml/data/pack_example.json +8 -8
  768. teradataml/data/package_tracking.csv +19 -19
  769. teradataml/data/package_tracking_pti.csv +18 -18
  770. teradataml/data/pagerank_example.json +13 -13
  771. teradataml/data/paragraphs_input.csv +6 -6
  772. teradataml/data/pathanalyzer_example.json +7 -7
  773. teradataml/data/pathgenerator_example.json +7 -7
  774. teradataml/data/phrases.csv +7 -7
  775. teradataml/data/pivot_example.json +8 -8
  776. teradataml/data/pivot_input.csv +22 -22
  777. teradataml/data/playerRating.csv +31 -31
  778. teradataml/data/postagger_example.json +6 -6
  779. teradataml/data/posttagger_output.csv +44 -44
  780. teradataml/data/production_data.csv +16 -16
  781. teradataml/data/production_data2.csv +7 -7
  782. teradataml/data/randomsample_example.json +31 -31
  783. teradataml/data/randomwalksample_example.json +8 -8
  784. teradataml/data/rank_table.csv +6 -6
  785. teradataml/data/ref_mobile_data.csv +4 -4
  786. teradataml/data/ref_mobile_data_dense.csv +2 -2
  787. teradataml/data/ref_url.csv +17 -17
  788. teradataml/data/restaurant_reviews.csv +7 -7
  789. teradataml/data/river_data.csv +145 -145
  790. teradataml/data/roc_example.json +7 -7
  791. teradataml/data/roc_input.csv +101 -101
  792. teradataml/data/rule_inputs.csv +6 -6
  793. teradataml/data/rule_table.csv +2 -2
  794. teradataml/data/sales.csv +7 -7
  795. teradataml/data/sales_transaction.csv +501 -501
  796. teradataml/data/salesdata.csv +342 -342
  797. teradataml/data/sample_cities.csv +2 -2
  798. teradataml/data/sample_shapes.csv +10 -10
  799. teradataml/data/sample_streets.csv +2 -2
  800. teradataml/data/sampling_example.json +15 -15
  801. teradataml/data/sax_example.json +8 -8
  802. teradataml/data/scale_example.json +23 -23
  803. teradataml/data/scale_housing.csv +11 -11
  804. teradataml/data/scale_housing_test.csv +6 -6
  805. teradataml/data/scale_stat.csv +11 -11
  806. teradataml/data/scalebypartition_example.json +13 -13
  807. teradataml/data/scalemap_example.json +13 -13
  808. teradataml/data/scalesummary_example.json +12 -12
  809. teradataml/data/score_category.csv +101 -101
  810. teradataml/data/score_summary.csv +4 -4
  811. teradataml/data/script_example.json +9 -9
  812. teradataml/data/scripts/deploy_script.py +65 -0
  813. teradataml/data/scripts/mapper.R +20 -0
  814. teradataml/data/scripts/mapper.py +15 -15
  815. teradataml/data/scripts/mapper_replace.py +15 -15
  816. teradataml/data/scripts/sklearn/__init__.py +0 -0
  817. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  818. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  819. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  820. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  821. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  822. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  823. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  824. teradataml/data/seeds.csv +10 -10
  825. teradataml/data/sentenceextractor_example.json +6 -6
  826. teradataml/data/sentiment_extract_input.csv +11 -11
  827. teradataml/data/sentiment_train.csv +16 -16
  828. teradataml/data/sentiment_word.csv +20 -20
  829. teradataml/data/sentiment_word_input.csv +19 -19
  830. teradataml/data/sentimentextractor_example.json +24 -24
  831. teradataml/data/sentimenttrainer_example.json +8 -8
  832. teradataml/data/sequence_table.csv +10 -10
  833. teradataml/data/seriessplitter_example.json +7 -7
  834. teradataml/data/sessionize_example.json +17 -17
  835. teradataml/data/sessionize_table.csv +116 -116
  836. teradataml/data/setop_test1.csv +24 -24
  837. teradataml/data/setop_test2.csv +22 -22
  838. teradataml/data/soc_nw_edges.csv +10 -10
  839. teradataml/data/soc_nw_vertices.csv +7 -7
  840. teradataml/data/souvenir_timeseries.csv +167 -167
  841. teradataml/data/sparse_iris_attribute.csv +5 -5
  842. teradataml/data/sparse_iris_test.csv +121 -121
  843. teradataml/data/sparse_iris_train.csv +601 -601
  844. teradataml/data/star1.csv +6 -6
  845. teradataml/data/state_transition.csv +5 -5
  846. teradataml/data/stock_data.csv +53 -53
  847. teradataml/data/stock_movement.csv +11 -11
  848. teradataml/data/stock_vol.csv +76 -76
  849. teradataml/data/stop_words.csv +8 -8
  850. teradataml/data/store_sales.csv +37 -37
  851. teradataml/data/stringsimilarity_example.json +7 -7
  852. teradataml/data/strsimilarity_input.csv +13 -13
  853. teradataml/data/students.csv +101 -101
  854. teradataml/data/svm_iris_input_test.csv +121 -121
  855. teradataml/data/svm_iris_input_train.csv +481 -481
  856. teradataml/data/svm_iris_model.csv +7 -7
  857. teradataml/data/svmdense_example.json +9 -9
  858. teradataml/data/svmdensepredict_example.json +18 -18
  859. teradataml/data/svmsparse_example.json +7 -7
  860. teradataml/data/svmsparsepredict_example.json +13 -13
  861. teradataml/data/svmsparsesummary_example.json +7 -7
  862. teradataml/data/target_mobile_data.csv +13 -13
  863. teradataml/data/target_mobile_data_dense.csv +5 -5
  864. teradataml/data/templatedata.csv +1201 -1201
  865. teradataml/data/templates/open_source_ml.json +9 -0
  866. teradataml/data/teradataml_example.json +73 -1
  867. teradataml/data/test_classification.csv +101 -0
  868. teradataml/data/test_loan_prediction.csv +53 -53
  869. teradataml/data/test_pacf_12.csv +37 -37
  870. teradataml/data/test_prediction.csv +101 -0
  871. teradataml/data/test_regression.csv +101 -0
  872. teradataml/data/test_river2.csv +109 -109
  873. teradataml/data/text_inputs.csv +6 -6
  874. teradataml/data/textchunker_example.json +7 -7
  875. teradataml/data/textclassifier_example.json +6 -6
  876. teradataml/data/textclassifier_input.csv +7 -7
  877. teradataml/data/textclassifiertrainer_example.json +6 -6
  878. teradataml/data/textmorph_example.json +5 -5
  879. teradataml/data/textparser_example.json +15 -15
  880. teradataml/data/texttagger_example.json +11 -11
  881. teradataml/data/texttokenizer_example.json +6 -6
  882. teradataml/data/texttrainer_input.csv +11 -11
  883. teradataml/data/tf_example.json +6 -6
  884. teradataml/data/tfidf_example.json +13 -13
  885. teradataml/data/tfidf_input1.csv +201 -201
  886. teradataml/data/tfidf_train.csv +6 -6
  887. teradataml/data/time_table1.csv +535 -535
  888. teradataml/data/time_table2.csv +14 -14
  889. teradataml/data/timeseriesdata.csv +1601 -1601
  890. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  891. teradataml/data/titanic.csv +892 -892
  892. teradataml/data/token_table.csv +696 -696
  893. teradataml/data/train_multiclass.csv +101 -0
  894. teradataml/data/train_regression.csv +101 -0
  895. teradataml/data/train_regression_multiple_labels.csv +101 -0
  896. teradataml/data/train_tracking.csv +27 -27
  897. teradataml/data/transformation_table.csv +5 -5
  898. teradataml/data/transformation_table_new.csv +1 -1
  899. teradataml/data/tv_spots.csv +16 -16
  900. teradataml/data/twod_climate_data.csv +117 -117
  901. teradataml/data/uaf_example.json +475 -475
  902. teradataml/data/univariatestatistics_example.json +8 -8
  903. teradataml/data/unpack_example.json +9 -9
  904. teradataml/data/unpivot_example.json +9 -9
  905. teradataml/data/unpivot_input.csv +8 -8
  906. teradataml/data/us_air_pass.csv +36 -36
  907. teradataml/data/us_population.csv +624 -624
  908. teradataml/data/us_states_shapes.csv +52 -52
  909. teradataml/data/varmax_example.json +17 -17
  910. teradataml/data/vectordistance_example.json +25 -25
  911. teradataml/data/ville_climatedata.csv +121 -121
  912. teradataml/data/ville_tempdata.csv +12 -12
  913. teradataml/data/ville_tempdata1.csv +12 -12
  914. teradataml/data/ville_temperature.csv +11 -11
  915. teradataml/data/waveletTable.csv +1605 -1605
  916. teradataml/data/waveletTable2.csv +1605 -1605
  917. teradataml/data/weightedmovavg_example.json +8 -8
  918. teradataml/data/wft_testing.csv +5 -5
  919. teradataml/data/wine_data.csv +1600 -0
  920. teradataml/data/word_embed_input_table1.csv +5 -5
  921. teradataml/data/word_embed_input_table2.csv +4 -4
  922. teradataml/data/word_embed_model.csv +22 -22
  923. teradataml/data/words_input.csv +13 -13
  924. teradataml/data/xconvolve_complex_left.csv +6 -6
  925. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  926. teradataml/data/xgboost_example.json +35 -35
  927. teradataml/data/xgboostpredict_example.json +31 -31
  928. teradataml/dataframe/copy_to.py +1764 -1698
  929. teradataml/dataframe/data_transfer.py +2753 -2745
  930. teradataml/dataframe/dataframe.py +17545 -16946
  931. teradataml/dataframe/dataframe_utils.py +1837 -1740
  932. teradataml/dataframe/fastload.py +611 -603
  933. teradataml/dataframe/indexer.py +424 -424
  934. teradataml/dataframe/setop.py +1179 -1166
  935. teradataml/dataframe/sql.py +10090 -6432
  936. teradataml/dataframe/sql_function_parameters.py +439 -388
  937. teradataml/dataframe/sql_functions.py +652 -652
  938. teradataml/dataframe/sql_interfaces.py +220 -220
  939. teradataml/dataframe/vantage_function_types.py +674 -630
  940. teradataml/dataframe/window.py +693 -692
  941. teradataml/dbutils/__init__.py +3 -3
  942. teradataml/dbutils/dbutils.py +1167 -1150
  943. teradataml/dbutils/filemgr.py +267 -267
  944. teradataml/gen_ai/__init__.py +2 -2
  945. teradataml/gen_ai/convAI.py +472 -472
  946. teradataml/geospatial/__init__.py +3 -3
  947. teradataml/geospatial/geodataframe.py +1105 -1094
  948. teradataml/geospatial/geodataframecolumn.py +392 -387
  949. teradataml/geospatial/geometry_types.py +925 -925
  950. teradataml/hyperparameter_tuner/__init__.py +1 -1
  951. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  952. teradataml/hyperparameter_tuner/utils.py +281 -187
  953. teradataml/lib/aed_0_1.dll +0 -0
  954. teradataml/lib/libaed_0_1.dylib +0 -0
  955. teradataml/lib/libaed_0_1.so +0 -0
  956. teradataml/libaed_0_1.dylib +0 -0
  957. teradataml/libaed_0_1.so +0 -0
  958. teradataml/opensource/__init__.py +1 -0
  959. teradataml/opensource/sklearn/__init__.py +1 -0
  960. teradataml/opensource/sklearn/_class.py +255 -0
  961. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  962. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  963. teradataml/opensource/sklearn/constants.py +54 -0
  964. teradataml/options/__init__.py +121 -124
  965. teradataml/options/configure.py +337 -336
  966. teradataml/options/display.py +176 -176
  967. teradataml/plot/__init__.py +2 -2
  968. teradataml/plot/axis.py +1388 -1388
  969. teradataml/plot/constants.py +15 -15
  970. teradataml/plot/figure.py +398 -398
  971. teradataml/plot/plot.py +760 -760
  972. teradataml/plot/query_generator.py +83 -83
  973. teradataml/plot/subplot.py +216 -216
  974. teradataml/scriptmgmt/UserEnv.py +3788 -3761
  975. teradataml/scriptmgmt/__init__.py +3 -3
  976. teradataml/scriptmgmt/lls_utils.py +1616 -1604
  977. teradataml/series/series.py +532 -532
  978. teradataml/series/series_utils.py +71 -71
  979. teradataml/table_operators/Apply.py +949 -917
  980. teradataml/table_operators/Script.py +1719 -1982
  981. teradataml/table_operators/TableOperator.py +1207 -1616
  982. teradataml/table_operators/__init__.py +2 -3
  983. teradataml/table_operators/apply_query_generator.py +262 -262
  984. teradataml/table_operators/query_generator.py +507 -507
  985. teradataml/table_operators/table_operator_query_generator.py +460 -460
  986. teradataml/table_operators/table_operator_util.py +631 -639
  987. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  988. teradataml/table_operators/templates/dataframe_map.template +176 -176
  989. teradataml/table_operators/templates/script_executor.template +170 -170
  990. teradataml/utils/dtypes.py +684 -684
  991. teradataml/utils/internal_buffer.py +84 -84
  992. teradataml/utils/print_versions.py +205 -205
  993. teradataml/utils/utils.py +410 -410
  994. teradataml/utils/validators.py +2239 -2115
  995. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +270 -41
  996. teradataml-20.0.0.0.dist-info/RECORD +1038 -0
  997. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +1 -1
  998. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +1 -1
  999. teradataml/analytics/mle/AdaBoost.py +0 -651
  1000. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1001. teradataml/analytics/mle/Antiselect.py +0 -342
  1002. teradataml/analytics/mle/Arima.py +0 -641
  1003. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1004. teradataml/analytics/mle/Attribution.py +0 -1070
  1005. teradataml/analytics/mle/Betweenness.py +0 -658
  1006. teradataml/analytics/mle/Burst.py +0 -711
  1007. teradataml/analytics/mle/CCM.py +0 -600
  1008. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1009. teradataml/analytics/mle/CFilter.py +0 -460
  1010. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1011. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1012. teradataml/analytics/mle/Closeness.py +0 -737
  1013. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1014. teradataml/analytics/mle/Correlation.py +0 -477
  1015. teradataml/analytics/mle/Correlation2.py +0 -573
  1016. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1017. teradataml/analytics/mle/CoxPH.py +0 -556
  1018. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1019. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1020. teradataml/analytics/mle/DTW.py +0 -623
  1021. teradataml/analytics/mle/DWT.py +0 -564
  1022. teradataml/analytics/mle/DWT2D.py +0 -599
  1023. teradataml/analytics/mle/DecisionForest.py +0 -716
  1024. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1025. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1026. teradataml/analytics/mle/DecisionTree.py +0 -830
  1027. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1028. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1029. teradataml/analytics/mle/FMeasure.py +0 -402
  1030. teradataml/analytics/mle/FPGrowth.py +0 -734
  1031. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1032. teradataml/analytics/mle/GLM.py +0 -558
  1033. teradataml/analytics/mle/GLML1L2.py +0 -547
  1034. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1035. teradataml/analytics/mle/GLMPredict.py +0 -529
  1036. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1037. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1038. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1039. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1040. teradataml/analytics/mle/Histogram.py +0 -561
  1041. teradataml/analytics/mle/IDWT.py +0 -476
  1042. teradataml/analytics/mle/IDWT2D.py +0 -493
  1043. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1044. teradataml/analytics/mle/Interpolator.py +0 -918
  1045. teradataml/analytics/mle/KMeans.py +0 -485
  1046. teradataml/analytics/mle/KNN.py +0 -627
  1047. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1048. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1049. teradataml/analytics/mle/LAR.py +0 -439
  1050. teradataml/analytics/mle/LARPredict.py +0 -478
  1051. teradataml/analytics/mle/LDA.py +0 -548
  1052. teradataml/analytics/mle/LDAInference.py +0 -492
  1053. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1054. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1055. teradataml/analytics/mle/LinReg.py +0 -433
  1056. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1057. teradataml/analytics/mle/MinHash.py +0 -544
  1058. teradataml/analytics/mle/Modularity.py +0 -587
  1059. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1060. teradataml/analytics/mle/NERExtractor.py +0 -595
  1061. teradataml/analytics/mle/NERTrainer.py +0 -458
  1062. teradataml/analytics/mle/NGrams.py +0 -570
  1063. teradataml/analytics/mle/NPath.py +0 -634
  1064. teradataml/analytics/mle/NTree.py +0 -549
  1065. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1066. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1067. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1068. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1069. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1070. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1071. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1072. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1073. teradataml/analytics/mle/POSTagger.py +0 -417
  1074. teradataml/analytics/mle/Pack.py +0 -411
  1075. teradataml/analytics/mle/PageRank.py +0 -535
  1076. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1077. teradataml/analytics/mle/PathGenerator.py +0 -367
  1078. teradataml/analytics/mle/PathStart.py +0 -464
  1079. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1080. teradataml/analytics/mle/Pivot.py +0 -471
  1081. teradataml/analytics/mle/ROC.py +0 -425
  1082. teradataml/analytics/mle/RandomSample.py +0 -637
  1083. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1084. teradataml/analytics/mle/SAX.py +0 -779
  1085. teradataml/analytics/mle/SVMDense.py +0 -677
  1086. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1087. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1088. teradataml/analytics/mle/SVMSparse.py +0 -557
  1089. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1090. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1091. teradataml/analytics/mle/Sampling.py +0 -549
  1092. teradataml/analytics/mle/Scale.py +0 -565
  1093. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1094. teradataml/analytics/mle/ScaleMap.py +0 -378
  1095. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1096. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1097. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1098. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1099. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1100. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1101. teradataml/analytics/mle/Sessionize.py +0 -475
  1102. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1103. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1104. teradataml/analytics/mle/TF.py +0 -389
  1105. teradataml/analytics/mle/TFIDF.py +0 -504
  1106. teradataml/analytics/mle/TextChunker.py +0 -414
  1107. teradataml/analytics/mle/TextClassifier.py +0 -399
  1108. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1109. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1110. teradataml/analytics/mle/TextMorph.py +0 -494
  1111. teradataml/analytics/mle/TextParser.py +0 -623
  1112. teradataml/analytics/mle/TextTagger.py +0 -530
  1113. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1114. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1115. teradataml/analytics/mle/Unpack.py +0 -526
  1116. teradataml/analytics/mle/Unpivot.py +0 -438
  1117. teradataml/analytics/mle/VarMax.py +0 -776
  1118. teradataml/analytics/mle/VectorDistance.py +0 -762
  1119. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1120. teradataml/analytics/mle/XGBoost.py +0 -842
  1121. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1122. teradataml/analytics/mle/__init__.py +0 -123
  1123. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1124. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1125. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1126. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1127. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1128. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1129. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1130. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1131. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1132. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1133. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1134. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1135. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1136. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1137. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1138. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1139. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1140. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1141. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1142. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1143. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1144. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1145. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1146. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1147. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1148. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1149. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1150. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1151. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1152. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1153. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1154. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1155. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1156. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1157. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1158. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1159. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1160. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1161. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1162. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1163. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1164. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1165. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1166. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1167. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1168. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1169. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1170. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1171. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1172. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1173. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1174. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1175. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1176. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1177. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1178. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1179. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1180. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1181. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1182. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1183. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1184. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1185. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1186. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1187. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1188. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1189. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1190. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1191. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1192. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1193. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1194. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1195. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1196. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1197. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1198. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1199. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1200. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1201. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1202. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1203. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1204. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1205. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1206. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1207. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1208. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1209. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1210. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1211. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1212. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1213. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1214. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1215. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1216. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1217. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1218. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1219. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1220. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1221. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1222. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1223. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1224. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1225. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1226. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1227. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1228. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1229. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1230. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1231. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1232. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1233. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1234. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1235. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1236. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1237. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1238. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1239. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1240. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1241. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1242. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1243. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1244. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1245. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1246. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1247. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1248. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1249. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1250. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1251. teradataml/analytics/sqle/Antiselect.py +0 -321
  1252. teradataml/analytics/sqle/Attribution.py +0 -603
  1253. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1254. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1255. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1256. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1257. teradataml/analytics/sqle/NPath.py +0 -632
  1258. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1259. teradataml/analytics/sqle/Pack.py +0 -388
  1260. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1261. teradataml/analytics/sqle/Sessionize.py +0 -390
  1262. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1263. teradataml/analytics/sqle/Unpack.py +0 -503
  1264. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1265. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1266. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1267. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1268. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1269. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1270. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1271. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1272. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1273. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1274. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1275. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1276. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1277. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1278. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1279. teradataml/catalog/model_cataloging.py +0 -980
  1280. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1281. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1282. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1283. teradataml/table_operators/sandbox_container_util.py +0 -643
  1284. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1285. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
@@ -1,1166 +1,1179 @@
1
- #!/usr/bin/python
2
- # ##################################################################
3
- #
4
- # Copyright 2019 Teradata. All rights reserved.
5
- # TERADATA CONFIDENTIAL AND TRADE SECRET
6
- #
7
- # Primary Owner: Rohit Khurd (rohit.khurd@teradata.com
8
- # Secondary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
9
- #
10
- # This file implements APIs and utility functions for set operations.
11
- # ##################################################################
12
-
13
- import inspect, importlib
14
- from collections import OrderedDict
15
- from teradataml.common.exceptions import TeradataMlException
16
- from teradataml.common.messages import Messages
17
- from teradataml.common.messagecodes import MessageCodes
18
- from teradataml.common.utils import UtilFuncs
19
- from teradataml.dataframe import dataframe
20
- from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
21
- from teradataml.common.aed_utils import AedUtils
22
- from teradataml.utils.validators import _Validators
23
- from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
24
- from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
25
- from teradatasql import OperationalError
26
-
27
- module = importlib.import_module("teradataml")
28
-
29
- def __validate_setop_args(df_list, awu_matrix, setop_type):
30
- """
31
- DESCRIPTION:
32
- Internal function to check for the validity of the input arguments.
33
-
34
- PARAMETERS:
35
- df_list:
36
- Required argument.
37
- Specifies the list of teradataml DataFrames.
38
- Types: list of teradataml DataFrames
39
-
40
- awu_matrix:
41
- Required argument.
42
- Specifies the argument is expected to be a list of arguments, expected types are
43
- mentioned as type or tuple.
44
-
45
- setop_type:
46
- Required argument.
47
- Specifies the type of SET Operation to be performed.
48
- Types: str
49
-
50
- RAISES:
51
- TeradataMlException
52
-
53
- EXAMPLES:
54
- __validate_setop_args(df_list, awu_matrix, setop_type)
55
-
56
- """
57
- # Validate argument types
58
- _Validators._validate_function_arguments(awu_matrix)
59
-
60
- # Validate the number of dfs in df_list
61
- if len(df_list) < 2:
62
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_INVALID_DF_COUNT,
63
- setop_type),
64
- MessageCodes.SETOP_INVALID_DF_COUNT)
65
-
66
- # Validate if all items in df_list are DataFrames
67
- for i in range(len(df_list)):
68
- _Validators._validate_function_arguments([['df_list[{0}]'.format(i), df_list[i],
69
- False, (dataframe.DataFrame)]])
70
-
71
- # Validate number of columns for 'td_intersect' and 'td_minus'
72
- if setop_type in ['td_intersect', 'td_minus', 'td_except']:
73
- it = iter(df_list[i].columns for i in range(len(df_list)))
74
- the_len = len(next(it))
75
- if not all(len(l) == the_len for l in it):
76
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_DF_LENGTH),
77
- MessageCodes.INVALID_DF_LENGTH)
78
-
79
- def __check_concat_compatibility(df_list, join, sort, ignore_index):
80
- """
81
- DESCRIPTION:
82
- Internal function to check if the DataFrames are compatible for concat or not.
83
-
84
- PARAMETERS:
85
- df_list:
86
- Required argument.
87
- Specifies the list of teradataml DataFrames to be concatenated.
88
- Type: list of teradataml DataFrames
89
-
90
- join:
91
- Required argument.
92
- Specifies the type of join to use in concat ('inner' or 'outer').
93
- Type: str
94
-
95
- sort:
96
- Required argument.
97
- Specifies a flag to determine whether the columns should be sorted while being projected.
98
- Type: bool
99
-
100
- ignore_index:
101
- Required argument.
102
- Specifies whether to ignore the index columns in resulting DataFrame or not.
103
- Types: bool
104
-
105
- RETURNS:
106
- A tuple of the following form:
107
- (master_column_dict, is_lazy)
108
-
109
- where master_column_dict is a dictionary with the column names to project as a result as the keys,
110
- and is of the following form:
111
- {
112
- '<col_name_1>' : {
113
- 'col_present' : [True, False],
114
- 'col_type': <type>
115
- },
116
- '<col_name_2>' : {
117
- ...
118
- },
119
- ...
120
- }
121
-
122
- The value of the keys in the dictionary is again a dictionary with the following elements:
123
- 1. 'col_present': A list of booleans, the nth value in it indicating the columns presence in the nth DF.
124
- Presence specified by True, and absence by False,
125
- 2. 'col_type': The teradatasqlalchemy datatype of the column in the first DF that the column is present in,
126
-
127
- and 'is_lazy' is a boolean which indicates whether the result DataFrame creation should be a lazy operation
128
- or not, based on the column type compatibility.
129
-
130
- RAISES:
131
- None
132
-
133
- EXAMPLES:
134
- columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort)
135
- """
136
- dfs_to_operate_on = df_list
137
-
138
- # Initialize the return objects including a variable deciding whether the execution is lazy or not.
139
- # The execution will be non-lazy if the types of columns are not an exact match.
140
- # TODO: Add a set operation type compatibility matrix for use to make this operation completely lazy
141
- # https://jira.td.teradata.com/jira/browse/ELE-1913
142
-
143
- col_dict = OrderedDict()
144
- is_lazy = True
145
-
146
- # Iterate on all DFs to be applied for set operation.
147
- for df in dfs_to_operate_on:
148
- # Process each column in the DF of the iteration.
149
- for c in df._metaexpr.t.c:
150
- col_name = c.name
151
- # Process the column name if it is not already processed.
152
- # Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
153
- # then the column with the same name in any DF in later iterations need not be processed.
154
- if col_name not in col_dict:
155
- # For every column, it's entry in the dictionary looks like:
156
- # '<column_name>' : { 'col_present' : [True, False], 'col_type': <type> }
157
- # where :
158
- # '<column_name>' : is the name of the column being processed.
159
- #
160
- # It's value is yet another dictionary with keys:
161
- # 'col_present' : Its value is a list of booleans, the nth value in it indicating the
162
- # columns presence in the nth DF - presence specified by True,
163
- # and absence by False.
164
- # 'col_type' : Its value is the teradatasqlalchemy type of the column in the first DF
165
- # that the column is present in.
166
-
167
- # Generate a list of booleans, each value of it indicating the columns presence in the DF in the
168
- # dfs_to_operate_on list. If ignore_index is True then assign False so that we can ignore when
169
- # forming dict.
170
-
171
- col_present_in_dfs = []
172
- for inner_df in dfs_to_operate_on:
173
- col_present_in_df = None
174
- if ignore_index and inner_df.index and col_name in inner_df._index_label:
175
- col_present_in_df = False
176
- else:
177
- col_present_in_df = df_utils._check_column_exists(col_name, inner_df.columns)
178
- col_present_in_dfs.append(col_present_in_df)
179
-
180
- if join.upper() == 'INNER':
181
- # For inner join, column has to present in all DFs.
182
- if all(col_present_in_dfs):
183
- col_dict[col_name] = {}
184
-
185
- # Get the type of the column in all the DFs.
186
- col_types_in_dfs = [inner_df._metaexpr.t.c[col_name].type for inner_df in
187
- dfs_to_operate_on]
188
-
189
- # Populate the 'column_present' list using the col_present_in_dfs.
190
- col_dict[col_name]['col_present'] = col_present_in_dfs
191
- # The type to be used for the column is the one of the first DF it is present in.
192
- col_dict[col_name]['col_type'] = col_types_in_dfs[0]
193
-
194
- # If the type of the column in all DFs is not the same, then the operation is not lazy.
195
- if not all(ctype == col_dict[col_name]['col_type']
196
- for ctype in col_types_in_dfs):
197
- is_lazy = False
198
-
199
- elif join.upper() == 'OUTER':
200
- # If the column is marked as False for all DataFrames
201
- if not any(col_present_in_dfs):
202
- pass
203
- else:
204
- # For outer join, column need not be present in all DFs.
205
- col_dict[col_name] = {}
206
- # Get the type of the column in all the DFs. None for the DF it is not present in.
207
- col_types_in_dfs = [None if not present else inner_df._metaexpr.t.c[col_name].type
208
- for (inner_df, present) in zip(dfs_to_operate_on, col_present_in_dfs)]
209
-
210
- # Find the type of the column in the first DF it is present in.
211
- non_none_type_to_add = next(ctype for ctype in col_types_in_dfs if ctype is not None)
212
-
213
- # Populate the 'column_present' list using the col_present_in_dfs.
214
- col_dict[col_name]['col_present'] = col_present_in_dfs
215
- # The type to be used for the column is the one of the first DF it is present in.
216
- col_dict[col_name]['col_type'] = non_none_type_to_add
217
-
218
- # If the type of the column in all DFs is not the same, then the operation is not lazy.
219
- if not all(True if ctype is None else ctype == non_none_type_to_add
220
- for ctype in col_types_in_dfs):
221
- is_lazy = False
222
-
223
- # Sort if required
224
- if sort and join.upper() == 'OUTER':
225
- col_dict = OrderedDict(sorted(col_dict.items()))
226
-
227
- # If the result has no columns, i.e. no data
228
- if len(col_dict) < 1:
229
- raise TeradataMlException(Messages.get_message(MessageCodes.DF_WITH_NO_COLUMNS),
230
- MessageCodes.DF_WITH_NO_COLUMNS)
231
-
232
- return col_dict, is_lazy
233
-
234
- def __check_setop_if_lazy(df_list):
235
- """
236
- DESCRIPTION:
237
- Internal function to check if the teradataml DataFrames column types are compatible for
238
- any set operation or not.
239
-
240
- PARAMETERS:
241
- df_list:
242
- Required argument.
243
- Specifies the list of teradataml DataFrames.
244
- Types: list of teradataml DataFrames
245
-
246
- RETURNS:
247
- A boolean 'is_lazy' which indicates whether the result DataFrame creation should be a
248
- lazy operation or not.
249
-
250
- RAISES:
251
- None
252
-
253
- EXAMPLES:
254
- is_lazy = __check_setop_if_lazy(df_list)
255
- """
256
-
257
- # Initialize the return variable deciding whether the execution is lazy or not.
258
- # The execution will be non-lazy if the types of columns are not an exact match.
259
- is_lazy = True
260
-
261
- # Take first df's metadata for columns and then iterate for column_names on first DF which
262
- # has to be projected for any set operation.
263
- for i, col in enumerate(df_list[0]._metaexpr.t.c):
264
- for k in range(1, len(df_list)) :
265
- next_df_cols = df_list[k].columns
266
- next_df_type = df_list[k]._metaexpr.t.c[next_df_cols[i]].type
267
- if (type(next_df_type) != type(col.type)):
268
- is_lazy = False
269
-
270
- return is_lazy
271
-
272
- def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name = None):
273
- """
274
- DESCRIPTION:
275
- Internal function to process the columns as per given nodeid and setop_type, and
276
- return the result DataFrame.
277
-
278
- PARAMETERS:
279
- meta_data:
280
- Required argument.
281
- Specifies either a metaexpr for the first DataFrame or a dictionary with the
282
- column names as dictionary keys to be projected as a result. If a dict, the value
283
- of the keys in the dictionary is again a dictionary with the elements mentioning
284
- column presence and its type.
285
- Types: _MetaExpression, OrderedDict
286
-
287
- is_lazy:
288
- Required argument.
289
- Specifies a boolean based on the column type compatibility, indicating
290
- whether set operation is lazy or not.
291
- Types: bool
292
-
293
- setop_type:
294
- Required argument.
295
- Specifies the type of SET Operation to be performed.
296
- Types: str
297
-
298
- nodeid:
299
- Required argument.
300
- node id for the teradataml DataFrame.
301
-
302
- index_label:
303
- Required argument.
304
- Specifies list of index columns for teradataml DataFrame.
305
- Types: list
306
-
307
- index_to_use:
308
- Required argument.
309
- Specifies column(s) which can also be part of final index_label list.
310
- Types: list
311
-
312
- class_name:
313
- Optional argument.
314
- Specifies the name of the class for the first dataframe for deciding the
315
- return type of the output dataframe.
316
- Default: None
317
- Types: String
318
-
319
- RETURNS:
320
- teradataml DataFrame
321
-
322
- RAISES:
323
- TeradataMlException
324
-
325
- EXAMPLES:
326
- >>> __process_operation(meta_data, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
327
-
328
- """
329
-
330
- # Separate processing for concat and other set operators as concat has OrderedDict as metadata.
331
- if setop_type == 'concat':
332
- class_name = "DataFrame"
333
- column_info = list((col_name, meta_data[col_name]['col_type']) for col_name in meta_data)
334
- for col in column_info:
335
- if isinstance(col[1], (GEOMETRY, MBR, MBB)):
336
- class_name = "GeoDataFrame"
337
- break
338
-
339
- # Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
340
- meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info) if is_lazy else meta_data
341
-
342
- if is_lazy:
343
- return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
344
- else:
345
- try:
346
- # Execute node and get table_name to build DataFrame on.
347
- table_name = df_utils._execute_node_return_db_object_name(nodeid)
348
- return getattr(module, class_name).from_table(table_name, index_label=index_to_use)
349
- except TeradataMlException as err:
350
- # We should be here only because of failure caused in creating DF.
351
- # due to incompatible types, but a TeradataMLException is raised when DF creation fails.
352
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_COL_TYPE_MISMATCH, setop_type),
353
- MessageCodes.SETOP_COL_TYPE_MISMATCH) from err
354
- except OperationalError:
355
- raise
356
-
357
-
358
- def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
359
- """
360
- DESCRIPTION:
361
- Concatenates a list of teradataml DataFrames, GeoDataFrames, or both along the index axis.
362
-
363
- PARAMETERS:
364
- df_list:
365
- Required argument.
366
- Specifies a list of teradataml DataFrames, GeoDataFrames, or both on which the
367
- concatenation is to be performed.
368
- Types: list of teradataml DataFrames and/or GeoDataFrames
369
-
370
- join:
371
- Optional argument.
372
- Specifies how to handle indexes on columns axis.
373
- Supported values are:
374
- • 'OUTER': It instructs the function to project all columns from all the DataFrames.
375
- Columns not present in any DataFrame will have a SQL NULL value.
376
- • 'INNER': It instructs the function to project only the columns common to all DataFrames.
377
- Default value: 'OUTER'
378
- Permitted values: 'INNER', 'OUTER'
379
- Types: str
380
-
381
- allow_duplicates:
382
- Optional argument.
383
- Specifies if the result of concatenation can have duplicate rows.
384
- Default value: True
385
- Types: bool
386
-
387
- sort:
388
- Optional argument.
389
- Specifies a flag to sort the columns axis if it is not already aligned when
390
- the join argument is set to 'outer'.
391
- Default value: False
392
- Types: bool
393
-
394
- ignore_index:
395
- Optional argument.
396
- Specifies whether to ignore the index columns in resulting DataFrame or not.
397
- If True, then index columns will be ignored in the concat operation.
398
- Default value: False
399
- Types: bool
400
-
401
- RETURNS:
402
- teradataml DataFrame, if result does not contain any geometry data, otherwise returns teradataml GeoDataFrame.
403
-
404
- RAISES:
405
- TeradataMlException
406
-
407
- EXAMPLES:
408
- >>> from teradataml import load_example_data
409
- >>> load_example_data("dataframe", "admissions_train")
410
- >>> load_example_data("geodataframe", ["sample_shapes"])
411
- >>> from teradataml.dataframe import concat
412
- >>>
413
- >>> # Default options
414
- >>> df = DataFrame('admissions_train')
415
- >>> df1 = df[df.gpa == 4].select(['id', 'stats', 'masters', 'gpa'])
416
- >>> df1
417
- stats masters gpa
418
- id
419
- 13 Advanced no 4.0
420
- 29 Novice yes 4.0
421
- 15 Advanced yes 4.0
422
- >>> df2 = df[df.gpa < 2].select(['id', 'stats', 'programming', 'admitted'])
423
- >>> df2
424
- stats programming admitted
425
- id
426
- 24 Advanced Novice 1
427
- 19 Advanced Advanced 0
428
- >>> cdf = concat([df1, df2])
429
- >>> cdf
430
- stats masters gpa programming admitted
431
- id
432
- 19 Advanced None NaN Advanced 0
433
- 24 Advanced None NaN Novice 1
434
- 13 Advanced no 4.0 None None
435
- 29 Novice yes 4.0 None None
436
- 15 Advanced yes 4.0 None None
437
- >>>
438
- >>> # concat more than two DataFrames
439
- >>> df3 = df[df.gpa == 3].select(['id', 'stats', 'programming', 'gpa'])
440
- >>> df3
441
- stats programming gpa
442
- id
443
- 36 Advanced Novice 3.0
444
- >>> cdf = concat([df1, df2, df3])
445
- >>> cdf
446
- stats masters gpa programming admitted
447
- id
448
- 15 Advanced yes 4.0 None NaN
449
- 19 Advanced None NaN Advanced 0.0
450
- 36 Advanced None 3.0 Novice NaN
451
- 29 Novice yes 4.0 None NaN
452
- 13 Advanced no 4.0 None NaN
453
- 24 Advanced None NaN Novice 1.0
454
-
455
- >>> # join = 'inner'
456
- >>> cdf = concat([df1, df2], join='inner')
457
- >>> cdf
458
- stats
459
- id
460
- 19 Advanced
461
- 24 Advanced
462
- 13 Advanced
463
- 29 Novice
464
- 15 Advanced
465
- >>>
466
- >>> # allow_duplicates = True (default)
467
- >>> cdf = concat([df1, df2])
468
- >>> cdf
469
- stats masters gpa programming admitted
470
- id
471
- 19 Advanced None NaN Advanced 0
472
- 24 Advanced None NaN Novice 1
473
- 13 Advanced no 4.0 None None
474
- 29 Novice yes 4.0 None None
475
- 15 Advanced yes 4.0 None None
476
- >>> cdf = concat([cdf, df2])
477
- >>> cdf
478
- stats masters gpa programming admitted
479
- id
480
- 19 Advanced None NaN Advanced 0
481
- 13 Advanced no 4.0 None None
482
- 24 Advanced None NaN Novice 1
483
- 24 Advanced None NaN Novice 1
484
- 19 Advanced None NaN Advanced 0
485
- 29 Novice yes 4.0 None None
486
- 15 Advanced yes 4.0 None None
487
- >>>
488
- >>> # allow_duplicates = False
489
- >>> cdf = concat([cdf, df2], allow_duplicates=False)
490
- >>> cdf
491
- stats masters gpa programming admitted
492
- id
493
- 19 Advanced None NaN Advanced 0
494
- 29 Novice yes 4.0 None None
495
- 24 Advanced None NaN Novice 1
496
- 15 Advanced yes 4.0 None None
497
- 13 Advanced no 4.0 None None
498
- >>>
499
- >>> # sort = True
500
- >>> cdf = concat([df1, df2], sort=True)
501
- >>> cdf
502
- admitted gpa masters programming stats
503
- id
504
- 19 0 NaN None Advanced Advanced
505
- 24 1 NaN None Novice Advanced
506
- 13 None 4.0 no None Advanced
507
- 29 None 4.0 yes None Novice
508
- 15 None 4.0 yes None Advanced
509
- >>>
510
- >>> # ignore_index = True
511
- >>> cdf = concat([df1, df2], ignore_index=True)
512
- >>> cdf
513
- stats masters gpa programming admitted
514
- 0 Advanced yes 4.0 None NaN
515
- 1 Advanced None NaN Advanced 0.0
516
- 2 Novice yes 4.0 None NaN
517
- 3 Advanced None NaN Novice 1.0
518
- 4 Advanced no 4.0 None NaN
519
-
520
- # Perform concatenation of two GeoDataFrames
521
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
522
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
523
- >>> geo_dataframe1
524
-
525
- skey linestrings
526
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
527
-
528
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','polygons'])
529
- >>> geo_dataframe2
530
-
531
- skey polygons
532
- 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
533
- 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
534
- 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
535
- 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
536
- 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
537
- 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
538
- 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
539
- 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
540
- 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
541
-
542
-
543
- >>> concat([geo_dataframe1,geo_dataframe2])
544
-
545
- skey linestrings polygons
546
- 1009 None MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
547
- 1005 None POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
548
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80) None
549
- 1004 None POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
550
- 1003 None POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
551
- 1001 None POLYGON ((0 0,0 20,20 20,20 0,0 0))
552
- 1002 None POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
553
- 1007 None MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
554
- 1006 None POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
555
- 1008 None MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
556
-
557
- # Perform concatenation of a DataFrame and GeoDataFrame which returns a GeoDataFrame.
558
- >>> normal_df=df.select(['id','stats'])
559
- >>> normal_df
560
- stats
561
- id
562
- 34 Advanced
563
- 32 Advanced
564
- 11 Advanced
565
- 40 Novice
566
- 38 Advanced
567
- 36 Advanced
568
- 7 Novice
569
- 26 Advanced
570
- 19 Advanced
571
- 13 Advanced
572
- >>> geo_df = geo_dataframe[geo_dataframe.skey < 1010].select(['skey', 'polygons'])
573
- >>> geo_df
574
-
575
- skey polygons
576
- 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
577
- 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
578
- 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
579
- 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
580
- 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
581
- 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
582
- 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
583
- 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
584
- 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
585
-
586
- >>> idf = concat([normal_df, geo_df])
587
- >>> idf
588
- stats skey polygons
589
- id
590
- 38 Advanced None None
591
- 7 Novice None None
592
- 26 Advanced None None
593
- 17 Advanced None None
594
- 34 Advanced None None
595
- 13 Advanced None None
596
- 32 Advanced None None
597
- 11 Advanced None None
598
- 15 Advanced None None
599
- 36 Advanced None None
600
- >>>
601
- """
602
- concat_join_permitted_values = ['INNER', 'OUTER']
603
-
604
- # Below matrix is list of list, where in each row contains following elements:
605
- # Let's take an example of following, just to get an idea:
606
- # [element1, element2, element3, element4, element5, element6]
607
- # e.g.
608
- # ["join", join, True, (str), True, concat_join_permitted_values]
609
-
610
- # 1. element1 --> Argument Name, a string. ["join" in above example.]
611
- # 2. element2 --> Argument itself. [join]
612
- # 3. element3 --> Specifies a flag that mentions argument is optional or not.
613
- # False, means required and True means optional.
614
- # 4. element4 --> Tuple of accepted types. (str) in above example.
615
- # 5. element5 --> True, means validate for empty value. Error will be raised, if empty values is passed.
616
- # If not specified, means same as specifying False.
617
- # 6. element6 --> A list of permitted values, an argument can accept.
618
- # If not specified, it is as good as passing None. If a list is passed, validation will be
619
- # performed for permitted values.
620
- awu_matrix = []
621
- awu_matrix.append(["df_list", df_list, False, (list)])
622
- awu_matrix.append(["join", join, True, (str), True, concat_join_permitted_values])
623
- awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
624
- awu_matrix.append(["sort", sort, False, (bool)])
625
- awu_matrix.append(["ignore_index", ignore_index, False, (bool)])
626
- setop_type='concat'
627
-
628
- # Validate Set operator arguments
629
- __validate_setop_args(df_list, awu_matrix, setop_type)
630
-
631
- # Generate the columns and their type to output, and check if the evaluation has to be lazy
632
- master_columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort, ignore_index)
633
-
634
- try:
635
- aed_utils = AedUtils()
636
-
637
- # Set the index_label to columns in first df's index_label if it is being projected,
638
- # else set it to columns in second df's index_label if it is being projected, else go on till last.
639
- # Finally set to None if none of df's have index_label
640
- index_label = None
641
- index_to_use = None
642
- for df in df_list:
643
- if df._index_label is not None and any(ind_col in master_columns_dict for ind_col in df._index_label):
644
- index_label = []
645
- index_to_use = df._index_label
646
- break
647
-
648
- if index_to_use is not None:
649
- for ind_col in index_to_use:
650
- if ind_col in master_columns_dict:
651
- index_label.append(ind_col)
652
-
653
- # Remove index columns if 'ignore_index' is set to True from master_columns_dict
654
- if ignore_index and index_to_use is not None:
655
- index_label = None
656
- index_to_use = None
657
-
658
- col_list = []
659
- for i in range(len(df_list)):
660
- col_list.append([])
661
-
662
- # Now create the list of columns for each DataFrame to concatenate
663
- type_compiler = td_type_compiler(td_dialect)
664
- for col_name, value in master_columns_dict.items():
665
- for i in range(len(col_list)):
666
- if not value['col_present'][i]:
667
- col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']),
668
- UtilFuncs._teradata_quote_arg(col_name, "\"",
669
- False)))
670
- else:
671
- col_name = UtilFuncs._process_for_teradata_keyword(col_name)
672
- col_list[i].append(col_name)
673
-
674
- input_table_columns = []
675
- for i in range(len(col_list)):
676
- input_table_columns.append(','.join(col_list[i]))
677
-
678
- concat_nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
679
- 'unionall' if allow_duplicates else 'union',
680
- input_table_columns)
681
- return __process_operation(master_columns_dict, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
682
-
683
- except TeradataMlException:
684
- raise
685
- except Exception as err:
686
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
687
- MessageCodes.SETOP_FAILED) from err
688
-
689
- def td_intersect(df_list, allow_duplicates=True):
690
- """
691
- DESCRIPTION:
692
- Function intersects a list of teradataml DataFrames or GeoDataFrames along the index axis and
693
- returns a DataFrame with rows common to all input DataFrames.
694
- Note:
695
- This function should be applied to data frames of the same type: either all teradataml DataFrames,
696
- or all GeoDataFrames.
697
-
698
- PARAMETERS:
699
- df_list:
700
- Required argument.
701
- Specifies the list of teradataml DataFrames or GeoDataFrames on which the intersection is to be performed.
702
- Types: list of teradataml DataFrames or GeoDataFrames
703
-
704
- allow_duplicates:
705
- Optional argument.
706
- Specifies if the result of intersection can have duplicate rows.
707
- Default value: True
708
- Types: bool
709
-
710
- RETURNS:
711
- teradataml DataFrame when intersect is performed on teradataml DataFrames.
712
- teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
713
-
714
- RAISES:
715
- TeradataMlException, TypeError
716
-
717
- EXAMPLES:
718
- >>> from teradataml import load_example_data
719
- >>> load_example_data("dataframe", "setop_test1")
720
- >>> load_example_data("dataframe", "setop_test2")
721
- >>> load_example_data("geodataframe", ["sample_shapes"])
722
- >>> from teradataml.dataframe.setop import td_intersect
723
- >>>
724
- >>> df1 = DataFrame('setop_test1')
725
- >>> df1
726
- masters gpa stats programming admitted
727
- id
728
- 62 no 3.70 Advanced Advanced 1
729
- 53 yes 3.50 Beginner Novice 1
730
- 69 no 3.96 Advanced Advanced 1
731
- 61 yes 4.00 Advanced Advanced 1
732
- 58 no 3.13 Advanced Advanced 1
733
- 51 yes 3.76 Beginner Beginner 0
734
- 68 no 1.87 Advanced Novice 1
735
- 66 no 3.87 Novice Beginner 1
736
- 60 no 4.00 Advanced Novice 1
737
- 59 no 3.65 Novice Novice 1
738
- >>> df2 = DataFrame('setop_test2')
739
- >>> df2
740
- masters gpa stats programming admitted
741
- id
742
- 12 no 3.65 Novice Novice 1
743
- 15 yes 4.00 Advanced Advanced 1
744
- 14 yes 3.45 Advanced Advanced 0
745
- 20 yes 3.90 Advanced Advanced 1
746
- 18 yes 3.81 Advanced Advanced 1
747
- 17 no 3.83 Advanced Advanced 1
748
- 13 no 4.00 Advanced Novice 1
749
- 11 no 3.13 Advanced Advanced 1
750
- 60 no 4.00 Advanced Novice 1
751
- 19 yes 1.98 Advanced Advanced 0
752
- >>> idf = td_intersect([df1, df2])
753
- >>> idf
754
- masters gpa stats programming admitted
755
- id
756
- 64 yes 3.81 Advanced Advanced 1
757
- 60 no 4.00 Advanced Novice 1
758
- 58 no 3.13 Advanced Advanced 1
759
- 68 no 1.87 Advanced Novice 1
760
- 66 no 3.87 Novice Beginner 1
761
- 60 no 4.00 Advanced Novice 1
762
- 62 no 3.70 Advanced Advanced 1
763
- >>>
764
- >>> idf = td_intersect([df1, df2], allow_duplicates=False)
765
- >>> idf
766
- masters gpa stats programming admitted
767
- id
768
- 64 yes 3.81 Advanced Advanced 1
769
- 60 no 4.00 Advanced Novice 1
770
- 58 no 3.13 Advanced Advanced 1
771
- 68 no 1.87 Advanced Novice 1
772
- 66 no 3.87 Novice Beginner 1
773
- 62 no 3.70 Advanced Advanced 1
774
- >>> # intersecting more than two DataFrames
775
- >>> df3 = df1[df1.gpa <= 3.5]
776
- >>> df3
777
- masters gpa stats programming admitted
778
- id
779
- 58 no 3.13 Advanced Advanced 1
780
- 67 yes 3.46 Novice Beginner 0
781
- 54 yes 3.50 Beginner Advanced 1
782
- 68 no 1.87 Advanced Novice 1
783
- 53 yes 3.50 Beginner Novice 1
784
- >>> idf = td_intersect([df1, df2, df3])
785
- >>> idf
786
- masters gpa stats programming admitted
787
- id
788
- 58 no 3.13 Advanced Advanced 1
789
- 68 no 1.87 Advanced Novice 1
790
-
791
- # Perform intersection of two GeoDataFrames.
792
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
793
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
794
- >>> geo_dataframe1
795
-
796
- skey linestrings
797
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
798
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
799
- >>> geo_dataframe2
800
-
801
- skey linestrings
802
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
803
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
804
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
805
- 1002 LINESTRING (1 3,3 0,0 1)
806
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
807
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
808
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
809
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
810
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
811
- >>> td_intersect([geo_dataframe1,geo_dataframe2])
812
-
813
- skey linestrings
814
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
815
- """
816
- awu_matrix = []
817
- awu_matrix.append(["df_list", df_list, False, (list)])
818
- awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
819
- setop_type = 'td_intersect'
820
- operation = 'intersect'
821
-
822
- # Validate Set operator arguments
823
- __validate_setop_args(df_list, awu_matrix, setop_type)
824
-
825
- return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
826
-
827
- def td_minus(df_list, allow_duplicates=True):
828
- """
829
- DESCRIPTION:
830
- This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
831
- and not in other teradataml DataFrames or GeoDataFrames along the index axis.
832
- Note:
833
- This function should be applied to data frames of the same type: either all teradataml DataFrames,
834
- or all GeoDataFrames.
835
-
836
- PARAMETERS:
837
- df_list:
838
- Required argument.
839
- Specifies the list of teradataml DataFrames or GeoDataFrames on which the minus
840
- operation is to be performed.
841
- Types: list of teradataml DataFrames or GeoDataFrames
842
-
843
- allow_duplicates:
844
- Optional argument.
845
- Specifies if the result of minus operation can have duplicate rows.
846
- Default value: True
847
- Types: bool
848
-
849
- RETURNS:
850
- teradataml DataFrame when operation is performed on teradataml DataFrames.
851
- teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
852
-
853
- RAISES:
854
- TeradataMlException, TypeError
855
-
856
- EXAMPLES:
857
- >>> from teradataml import load_example_data
858
- >>> load_example_data("dataframe", "setop_test1")
859
- >>> load_example_data("dataframe", "setop_test2")
860
- >>> load_example_data("geodataframe", ["sample_shapes"])
861
- >>> from teradataml.dataframe.setop import td_minus
862
- >>>
863
- >>> df1 = DataFrame('setop_test1')
864
- >>> df1
865
- masters gpa stats programming admitted
866
- id
867
- 62 no 3.70 Advanced Advanced 1
868
- 53 yes 3.50 Beginner Novice 1
869
- 69 no 3.96 Advanced Advanced 1
870
- 61 yes 4.00 Advanced Advanced 1
871
- 58 no 3.13 Advanced Advanced 1
872
- 51 yes 3.76 Beginner Beginner 0
873
- 68 no 1.87 Advanced Novice 1
874
- 66 no 3.87 Novice Beginner 1
875
- 60 no 4.00 Advanced Novice 1
876
- 59 no 3.65 Novice Novice 1
877
- >>> df2 = DataFrame('setop_test2')
878
- >>> df2
879
- masters gpa stats programming admitted
880
- id
881
- 12 no 3.65 Novice Novice 1
882
- 15 yes 4.00 Advanced Advanced 1
883
- 14 yes 3.45 Advanced Advanced 0
884
- 20 yes 3.90 Advanced Advanced 1
885
- 18 yes 3.81 Advanced Advanced 1
886
- 17 no 3.83 Advanced Advanced 1
887
- 13 no 4.00 Advanced Novice 1
888
- 11 no 3.13 Advanced Advanced 1
889
- 60 no 4.00 Advanced Novice 1
890
- 19 yes 1.98 Advanced Advanced 0
891
- >>> idf = td_minus([df1[df1.id<55] , df2])
892
- >>> idf
893
- masters gpa stats programming admitted
894
- id
895
- 51 yes 3.76 Beginner Beginner 0
896
- 50 yes 3.95 Beginner Beginner 0
897
- 54 yes 3.50 Beginner Advanced 1
898
- 52 no 3.70 Novice Beginner 1
899
- 53 yes 3.50 Beginner Novice 1
900
- 53 yes 3.50 Beginner Novice 1
901
- >>>
902
- >>> idf = td_minus([df1[df1.id<55] , df2], allow_duplicates=False)
903
- >>> idf
904
- masters gpa stats programming admitted
905
- id
906
- 54 yes 3.50 Beginner Advanced 1
907
- 51 yes 3.76 Beginner Beginner 0
908
- 53 yes 3.50 Beginner Novice 1
909
- 50 yes 3.95 Beginner Beginner 0
910
- 52 no 3.70 Novice Beginner 1
911
- >>> # applying minus on more than two DataFrames
912
- >>> df3 = df1[df1.gpa <= 3.9]
913
- >>> idf = td_minus([df1, df2, df3])
914
- >>> idf
915
- masters gpa stats programming admitted
916
- id
917
- 61 yes 4.00 Advanced Advanced 1
918
- 50 yes 3.95 Beginner Beginner 0
919
- 69 no 3.96 Advanced Advanced 1
920
-
921
- # td_minus on GeoDataFrame
922
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
923
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
924
- >>> geo_dataframe1
925
- skey linestrings
926
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
927
-
928
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
929
- >>> geo_dataframe2
930
- skey linestrings
931
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
932
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
933
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
934
- 1002 LINESTRING (1 3,3 0,0 1)
935
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
936
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
937
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
938
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
939
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
940
-
941
- >>> td_minus([geo_dataframe2,geo_dataframe1])
942
- linestrings
943
- skey
944
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
945
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
946
- 1002 LINESTRING (1 3,3 0,0 1)
947
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
948
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
949
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
950
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
951
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
952
- """
953
- awu_matrix = []
954
- awu_matrix.append(["df_list", df_list, False, (list)])
955
- awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
956
- setop_type = 'td_except' if (inspect.stack()[1][3]) == 'td_except' else 'td_minus'
957
- operation = 'minus'
958
-
959
- # Validate Set operator arguments
960
- __validate_setop_args(df_list, awu_matrix, setop_type)
961
-
962
- return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
963
-
964
- def td_except(df_list, allow_duplicates=True):
965
- """
966
- DESCRIPTION:
967
- This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
968
- and not in other teradataml DataFrames or GeoDataFrames along the index axis.
969
- Note:
970
- This function should be applied to data frames of the same type: either all teradataml DataFrames,
971
- or all GeoDataFrames.
972
-
973
- PARAMETERS:
974
- df_list:
975
- Required argument.
976
- Specifies the list of teradataml DataFrames or GeoDataFrames on which the except
977
- operation is to be performed.
978
- Types: list of teradataml DataFrames or GeoDataFrames
979
-
980
- allow_duplicates:
981
- Optional argument.
982
- Specifies if the result of except operation can have duplicate rows.
983
- Default value: True
984
- Types: bool
985
-
986
- RETURNS:
987
- teradataml DataFrame when operation is performed on teradataml DataFrames.
988
- teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
989
-
990
- RAISES:
991
- TeradataMlException, TypeError
992
-
993
- EXAMPLES:
994
- >>> from teradataml import load_example_data
995
- >>> load_example_data("dataframe", "setop_test1")
996
- >>> load_example_data("dataframe", "setop_test2")
997
- >>> load_example_data("geodataframe", ["sample_shapes"])
998
- >>> from teradataml.dataframe.setop import td_except
999
- >>>
1000
- >>> df1 = DataFrame('setop_test1')
1001
- >>> df1
1002
- masters gpa stats programming admitted
1003
- id
1004
- 62 no 3.70 Advanced Advanced 1
1005
- 53 yes 3.50 Beginner Novice 1
1006
- 69 no 3.96 Advanced Advanced 1
1007
- 61 yes 4.00 Advanced Advanced 1
1008
- 58 no 3.13 Advanced Advanced 1
1009
- 51 yes 3.76 Beginner Beginner 0
1010
- 68 no 1.87 Advanced Novice 1
1011
- 66 no 3.87 Novice Beginner 1
1012
- 60 no 4.00 Advanced Novice 1
1013
- 59 no 3.65 Novice Novice 1
1014
- >>> df2 = DataFrame('setop_test2')
1015
- >>> df2
1016
- masters gpa stats programming admitted
1017
- id
1018
- 12 no 3.65 Novice Novice 1
1019
- 15 yes 4.00 Advanced Advanced 1
1020
- 14 yes 3.45 Advanced Advanced 0
1021
- 20 yes 3.90 Advanced Advanced 1
1022
- 18 yes 3.81 Advanced Advanced 1
1023
- 17 no 3.83 Advanced Advanced 1
1024
- 13 no 4.00 Advanced Novice 1
1025
- 11 no 3.13 Advanced Advanced 1
1026
- 60 no 4.00 Advanced Novice 1
1027
- 19 yes 1.98 Advanced Advanced 0
1028
- >>> idf = td_except([df1[df1.id<55] , df2])
1029
- >>> idf
1030
- masters gpa stats programming admitted
1031
- id
1032
- 51 yes 3.76 Beginner Beginner 0
1033
- 50 yes 3.95 Beginner Beginner 0
1034
- 54 yes 3.50 Beginner Advanced 1
1035
- 52 no 3.70 Novice Beginner 1
1036
- 53 yes 3.50 Beginner Novice 1
1037
- 53 yes 3.50 Beginner Novice 1
1038
- >>>
1039
- >>> idf = td_except([df1[df1.id<55] , df2], allow_duplicates=False)
1040
- >>> idf
1041
- masters gpa stats programming admitted
1042
- id
1043
- 54 yes 3.50 Beginner Advanced 1
1044
- 51 yes 3.76 Beginner Beginner 0
1045
- 53 yes 3.50 Beginner Novice 1
1046
- 50 yes 3.95 Beginner Beginner 0
1047
- 52 no 3.70 Novice Beginner 1
1048
- >>> # applying except on more than two DataFrames
1049
- >>> df3 = df1[df1.gpa <= 3.9]
1050
- >>> idf = td_except([df1, df2, df3])
1051
- >>> idf
1052
- masters gpa stats programming admitted
1053
- id
1054
- 61 yes 4.00 Advanced Advanced 1
1055
- 50 yes 3.95 Beginner Beginner 0
1056
- 69 no 3.96 Advanced Advanced 1
1057
-
1058
- # td_except on GeoDataFrames
1059
- >>> geo_dataframe = GeoDataFrame('sample_shapes')
1060
- >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
1061
- >>> geo_dataframe1
1062
- skey linestrings
1063
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1064
-
1065
- >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
1066
- >>> geo_dataframe2
1067
- skey linestrings
1068
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1069
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1070
- 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1071
- 1002 LINESTRING (1 3,3 0,0 1)
1072
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
1073
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1074
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1075
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1076
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1077
-
1078
- >>> td_except([geo_dataframe2,geo_dataframe1])
1079
- skey linestrings
1080
- 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1081
- 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1082
- 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1083
- 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1084
- 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1085
- 1001 LINESTRING (1 1,2 2,3 3,4 4)
1086
- 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1087
- 1002 LINESTRING (1 3,3 0,0 1)
1088
- """
1089
- return td_minus(df_list, allow_duplicates)
1090
-
1091
- def __process_setop_operation(df_list, allow_duplicates, setop_type, operation):
1092
- """
1093
- DESCRIPTION:
1094
- Internal function to process set opertaion and return the result DataFrame/GeoDataFrame.
1095
-
1096
- PARAMETERS:
1097
- df_list:
1098
- Required argument.
1099
- Specifies the list of teradataml DataFrames/GeoDataFrames on which the except
1100
- operation is to be performed.
1101
- Types: list of teradataml DataFrames
1102
-
1103
- allow_duplicates:
1104
- Optional argument.
1105
- Specifies if the result of except operation can have duplicate rows.
1106
- Default value: True
1107
- Types: bool
1108
-
1109
- setop_type:
1110
- Required argument.
1111
- Specifies set opertaion.
1112
- Types: str
1113
-
1114
- operation:
1115
- Required argument.
1116
- Specifies set opertaion name.
1117
- Types: str
1118
-
1119
- RETURNS:
1120
- teradataml DataFrame/GeoDataFrame
1121
-
1122
- RAISES:
1123
- TeradataMlException
1124
-
1125
- EXAMPLES:
1126
- >>> __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
1127
-
1128
- """
1129
-
1130
- # Check if set operation can be lazy or not
1131
- is_lazy = __check_setop_if_lazy(df_list)
1132
-
1133
- # Get the first DataFrame's metaexpr
1134
- first_df_metaexpr = df_list[0]._metaexpr
1135
-
1136
- try:
1137
- aed_utils = AedUtils()
1138
- input_table_columns = []
1139
- for i in range(len(df_list)):
1140
- col_list = []
1141
- for j in range(len(df_list[i].columns)):
1142
- col_list.append(UtilFuncs._process_for_teradata_keyword(df_list[i].columns[j]))
1143
-
1144
- input_table_columns.append(','.join(col_list))
1145
-
1146
- nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
1147
- '{}all'.format(operation) if allow_duplicates else '{}'.format(operation),
1148
- input_table_columns)
1149
-
1150
- # Set the index_label to columns in first df's index_label if it is not None,
1151
- # else set it to None i.e. no index_label.
1152
- index_label = []
1153
- index_to_use = None
1154
- index_to_use = df_list[0]._index_label if df_list[0]._index_label is not None else None
1155
-
1156
- if index_to_use is not None:
1157
- index_label = index_to_use
1158
-
1159
- class_name = df_list[0].__class__.__name__
1160
- return __process_operation(first_df_metaexpr, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name)
1161
-
1162
- except TeradataMlException:
1163
- raise
1164
- except Exception as err:
1165
- raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
1166
- MessageCodes.SETOP_FAILED) from err
1
+ #!/usr/bin/python
2
+ # ##################################################################
3
+ #
4
+ # Copyright 2019 Teradata. All rights reserved.
5
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
6
+ #
7
+ # Primary Owner: Rohit Khurd (rohit.khurd@teradata.com
8
+ # Secondary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
9
+ #
10
+ # This file implements APIs and utility functions for set operations.
11
+ # ##################################################################
12
+
13
+ import inspect, importlib
14
+ from collections import OrderedDict
15
+ from teradataml.common.exceptions import TeradataMlException
16
+ from teradataml.common.messages import Messages
17
+ from teradataml.common.messagecodes import MessageCodes
18
+ from teradataml.common.utils import UtilFuncs
19
+ from teradataml.dataframe import dataframe
20
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
21
+ from teradataml.common.aed_utils import AedUtils
22
+ from teradataml.utils.validators import _Validators
23
+ from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
24
+ from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
25
+ from teradatasql import OperationalError
26
+
27
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
28
+
29
+ module = importlib.import_module("teradataml")
30
+
31
+ def __validate_setop_args(df_list, awu_matrix, setop_type):
32
+ """
33
+ DESCRIPTION:
34
+ Internal function to check for the validity of the input arguments.
35
+
36
+ PARAMETERS:
37
+ df_list:
38
+ Required argument.
39
+ Specifies the list of teradataml DataFrames.
40
+ Types: list of teradataml DataFrames
41
+
42
+ awu_matrix:
43
+ Required argument.
44
+ Specifies the argument is expected to be a list of arguments, expected types are
45
+ mentioned as type or tuple.
46
+
47
+ setop_type:
48
+ Required argument.
49
+ Specifies the type of SET Operation to be performed.
50
+ Types: str
51
+
52
+ RAISES:
53
+ TeradataMlException
54
+
55
+ EXAMPLES:
56
+ __validate_setop_args(df_list, awu_matrix, setop_type)
57
+
58
+ """
59
+ # Validate argument types
60
+ _Validators._validate_function_arguments(awu_matrix)
61
+
62
+ # Validate the number of dfs in df_list
63
+ if len(df_list) < 2:
64
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_INVALID_DF_COUNT,
65
+ setop_type),
66
+ MessageCodes.SETOP_INVALID_DF_COUNT)
67
+
68
+ # Validate if all items in df_list are DataFrames
69
+ for i in range(len(df_list)):
70
+ _Validators._validate_function_arguments([['df_list[{0}]'.format(i), df_list[i],
71
+ False, (dataframe.DataFrame)]])
72
+
73
+ # Validate number of columns for 'td_intersect' and 'td_minus'
74
+ if setop_type in ['td_intersect', 'td_minus', 'td_except']:
75
+ it = iter(df_list[i].columns for i in range(len(df_list)))
76
+ the_len = len(next(it))
77
+ if not all(len(l) == the_len for l in it):
78
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_DF_LENGTH),
79
+ MessageCodes.INVALID_DF_LENGTH)
80
+
81
+
82
+ def __check_concat_compatibility(df_list, join, sort, ignore_index):
83
+ """
84
+ DESCRIPTION:
85
+ Internal function to check if the DataFrames are compatible for concat or not.
86
+
87
+ PARAMETERS:
88
+ df_list:
89
+ Required argument.
90
+ Specifies the list of teradataml DataFrames to be concatenated.
91
+ Type: list of teradataml DataFrames
92
+
93
+ join:
94
+ Required argument.
95
+ Specifies the type of join to use in concat ('inner' or 'outer').
96
+ Type: str
97
+
98
+ sort:
99
+ Required argument.
100
+ Specifies a flag to determine whether the columns should be sorted while being projected.
101
+ Type: bool
102
+
103
+ ignore_index:
104
+ Required argument.
105
+ Specifies whether to ignore the index columns in resulting DataFrame or not.
106
+ Types: bool
107
+
108
+ RETURNS:
109
+ A tuple of the following form:
110
+ (master_column_dict, is_lazy)
111
+
112
+ where master_column_dict is a dictionary with the column names to project as a result as the keys,
113
+ and is of the following form:
114
+ {
115
+ '<col_name_1>' : {
116
+ 'col_present' : [True, False],
117
+ 'col_type': <type>
118
+ },
119
+ '<col_name_2>' : {
120
+ ...
121
+ },
122
+ ...
123
+ }
124
+
125
+ The value of the keys in the dictionary is again a dictionary with the following elements:
126
+ 1. 'col_present': A list of booleans, the nth value in it indicating the columns presence in the nth DF.
127
+ Presence specified by True, and absence by False,
128
+ 2. 'col_type': The teradatasqlalchemy datatype of the column in the first DF that the column is present in,
129
+
130
+ and 'is_lazy' is a boolean which indicates whether the result DataFrame creation should be a lazy operation
131
+ or not, based on the column type compatibility.
132
+
133
+ RAISES:
134
+ None
135
+
136
+ EXAMPLES:
137
+ columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort)
138
+ """
139
+ dfs_to_operate_on = df_list
140
+
141
+ # Initialize the return objects including a variable deciding whether the execution is lazy or not.
142
+ # The execution will be non-lazy if the types of columns are not an exact match.
143
+ # TODO: Add a set operation type compatibility matrix for use to make this operation completely lazy
144
+ # https://jira.td.teradata.com/jira/browse/ELE-1913
145
+
146
+ col_dict = OrderedDict()
147
+ is_lazy = True
148
+
149
+ # Iterate on all DFs to be applied for set operation.
150
+ for df in dfs_to_operate_on:
151
+ # Process each column in the DF of the iteration.
152
+ for c in df._metaexpr.t.c:
153
+ col_name = c.name
154
+ # Process the column name if it is not already processed.
155
+ # Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
156
+ # then the column with the same name in any DF in later iterations need not be processed.
157
+ if col_name not in col_dict:
158
+ # For every column, it's entry in the dictionary looks like:
159
+ # '<column_name>' : { 'col_present' : [True, False], 'col_type': <type> }
160
+ # where :
161
+ # '<column_name>' : is the name of the column being processed.
162
+ #
163
+ # It's value is yet another dictionary with keys:
164
+ # 'col_present' : Its value is a list of booleans, the nth value in it indicating the
165
+ # columns presence in the nth DF - presence specified by True,
166
+ # and absence by False.
167
+ # 'col_type' : Its value is the teradatasqlalchemy type of the column in the first DF
168
+ # that the column is present in.
169
+
170
+ # Generate a list of booleans, each value of it indicating the columns presence in the DF in the
171
+ # dfs_to_operate_on list. If ignore_index is True then assign False so that we can ignore when
172
+ # forming dict.
173
+
174
+ col_present_in_dfs = []
175
+ for inner_df in dfs_to_operate_on:
176
+ col_present_in_df = None
177
+ if ignore_index and inner_df.index and col_name in inner_df._index_label:
178
+ col_present_in_df = False
179
+ else:
180
+ col_present_in_df = df_utils._check_column_exists(col_name, inner_df.columns)
181
+ col_present_in_dfs.append(col_present_in_df)
182
+
183
+ if join.upper() == 'INNER':
184
+ # For inner join, column has to present in all DFs.
185
+ if all(col_present_in_dfs):
186
+ col_dict[col_name] = {}
187
+
188
+ # Get the type of the column in all the DFs.
189
+ col_types_in_dfs = [inner_df._metaexpr.t.c[col_name].type for inner_df in
190
+ dfs_to_operate_on]
191
+
192
+ # Populate the 'column_present' list using the col_present_in_dfs.
193
+ col_dict[col_name]['col_present'] = col_present_in_dfs
194
+ # The type to be used for the column is the one of the first DF it is present in.
195
+ col_dict[col_name]['col_type'] = col_types_in_dfs[0]
196
+
197
+ # If the type of the column in all DFs is not the same, then the operation is not lazy.
198
+ if not all(ctype == col_dict[col_name]['col_type']
199
+ for ctype in col_types_in_dfs):
200
+ is_lazy = False
201
+
202
+ elif join.upper() == 'OUTER':
203
+ # If the column is marked as False for all DataFrames
204
+ if not any(col_present_in_dfs):
205
+ pass
206
+ else:
207
+ # For outer join, column need not be present in all DFs.
208
+ col_dict[col_name] = {}
209
+ # Get the type of the column in all the DFs. None for the DF it is not present in.
210
+ col_types_in_dfs = [None if not present else inner_df._metaexpr.t.c[col_name].type
211
+ for (inner_df, present) in zip(dfs_to_operate_on, col_present_in_dfs)]
212
+
213
+ # Find the type of the column in the first DF it is present in.
214
+ non_none_type_to_add = next(ctype for ctype in col_types_in_dfs if ctype is not None)
215
+
216
+ # Populate the 'column_present' list using the col_present_in_dfs.
217
+ col_dict[col_name]['col_present'] = col_present_in_dfs
218
+ # The type to be used for the column is the one of the first DF it is present in.
219
+ col_dict[col_name]['col_type'] = non_none_type_to_add
220
+
221
+ # If the type of the column in all DFs is not the same, then the operation is not lazy.
222
+ if not all(True if ctype is None else ctype == non_none_type_to_add
223
+ for ctype in col_types_in_dfs):
224
+ is_lazy = False
225
+
226
+ # Sort if required
227
+ if sort and join.upper() == 'OUTER':
228
+ col_dict = OrderedDict(sorted(col_dict.items()))
229
+
230
+ # If the result has no columns, i.e. no data
231
+ if len(col_dict) < 1:
232
+ raise TeradataMlException(Messages.get_message(MessageCodes.DF_WITH_NO_COLUMNS),
233
+ MessageCodes.DF_WITH_NO_COLUMNS)
234
+
235
+ return col_dict, is_lazy
236
+
237
+
238
+ def __check_setop_if_lazy(df_list):
239
+ """
240
+ DESCRIPTION:
241
+ Internal function to check if the teradataml DataFrames column types are compatible for
242
+ any set operation or not.
243
+
244
+ PARAMETERS:
245
+ df_list:
246
+ Required argument.
247
+ Specifies the list of teradataml DataFrames.
248
+ Types: list of teradataml DataFrames
249
+
250
+ RETURNS:
251
+ A boolean 'is_lazy' which indicates whether the result DataFrame creation should be a
252
+ lazy operation or not.
253
+
254
+ RAISES:
255
+ None
256
+
257
+ EXAMPLES:
258
+ is_lazy = __check_setop_if_lazy(df_list)
259
+ """
260
+
261
+ # Initialize the return variable deciding whether the execution is lazy or not.
262
+ # The execution will be non-lazy if the types of columns are not an exact match.
263
+ is_lazy = True
264
+
265
+ # Take first df's metadata for columns and then iterate for column_names on first DF which
266
+ # has to be projected for any set operation.
267
+ for i, col in enumerate(df_list[0]._metaexpr.t.c):
268
+ for k in range(1, len(df_list)) :
269
+ next_df_cols = df_list[k].columns
270
+ next_df_type = df_list[k]._metaexpr.t.c[next_df_cols[i]].type
271
+ if (type(next_df_type) != type(col.type)):
272
+ is_lazy = False
273
+
274
+ return is_lazy
275
+
276
+
277
+ def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name = None):
278
+ """
279
+ DESCRIPTION:
280
+ Internal function to process the columns as per given nodeid and setop_type, and
281
+ return the result DataFrame.
282
+
283
+ PARAMETERS:
284
+ meta_data:
285
+ Required argument.
286
+ Specifies either a metaexpr for the first DataFrame or a dictionary with the
287
+ column names as dictionary keys to be projected as a result. If a dict, the value
288
+ of the keys in the dictionary is again a dictionary with the elements mentioning
289
+ column presence and its type.
290
+ Types: _MetaExpression, OrderedDict
291
+
292
+ is_lazy:
293
+ Required argument.
294
+ Specifies a boolean based on the column type compatibility, indicating
295
+ whether set operation is lazy or not.
296
+ Types: bool
297
+
298
+ setop_type:
299
+ Required argument.
300
+ Specifies the type of SET Operation to be performed.
301
+ Types: str
302
+
303
+ nodeid:
304
+ Required argument.
305
+ node id for the teradataml DataFrame.
306
+
307
+ index_label:
308
+ Required argument.
309
+ Specifies list of index columns for teradataml DataFrame.
310
+ Types: list
311
+
312
+ index_to_use:
313
+ Required argument.
314
+ Specifies column(s) which can also be part of final index_label list.
315
+ Types: list
316
+
317
+ class_name:
318
+ Optional argument.
319
+ Specifies the name of the class for the first dataframe for deciding the
320
+ return type of the output dataframe.
321
+ Default: None
322
+ Types: String
323
+
324
+ RETURNS:
325
+ teradataml DataFrame
326
+
327
+ RAISES:
328
+ TeradataMlException
329
+
330
+ EXAMPLES:
331
+ >>> __process_operation(meta_data, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
332
+
333
+ """
334
+
335
+ # Separate processing for concat and other set operators as concat has OrderedDict as metadata.
336
+ if setop_type == 'concat':
337
+ class_name = "DataFrame"
338
+ column_info = list((col_name, meta_data[col_name]['col_type']) for col_name in meta_data)
339
+ for col in column_info:
340
+ if isinstance(col[1], (GEOMETRY, MBR, MBB)):
341
+ class_name = "GeoDataFrame"
342
+ break
343
+
344
+ # Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
345
+ meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info) if is_lazy else meta_data
346
+
347
+ if is_lazy:
348
+ return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
349
+ else:
350
+ try:
351
+ # Execute node and get table_name to build DataFrame on.
352
+ table_name = df_utils._execute_node_return_db_object_name(nodeid)
353
+ return getattr(module, class_name).from_table(table_name, index_label=index_to_use)
354
+ except TeradataMlException as err:
355
+ # We should be here only because of failure caused in creating DF.
356
+ # due to incompatible types, but a TeradataMLException is raised when DF creation fails.
357
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_COL_TYPE_MISMATCH, setop_type),
358
+ MessageCodes.SETOP_COL_TYPE_MISMATCH) from err
359
+ except OperationalError:
360
+ raise
361
+
362
+
363
+ @collect_queryband(queryband="concat")
364
+ def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
365
+ """
366
+ DESCRIPTION:
367
+ Concatenates a list of teradataml DataFrames, GeoDataFrames, or both along the index axis.
368
+
369
+ PARAMETERS:
370
+ df_list:
371
+ Required argument.
372
+ Specifies a list of teradataml DataFrames, GeoDataFrames, or both on which the
373
+ concatenation is to be performed.
374
+ Types: list of teradataml DataFrames and/or GeoDataFrames
375
+
376
+ join:
377
+ Optional argument.
378
+ Specifies how to handle indexes on columns axis.
379
+ Supported values are:
380
+ • 'OUTER': It instructs the function to project all columns from all the DataFrames.
381
+ Columns not present in any DataFrame will have a SQL NULL value.
382
+ 'INNER': It instructs the function to project only the columns common to all DataFrames.
383
+ Default value: 'OUTER'
384
+ Permitted values: 'INNER', 'OUTER'
385
+ Types: str
386
+
387
+ allow_duplicates:
388
+ Optional argument.
389
+ Specifies if the result of concatenation can have duplicate rows.
390
+ Default value: True
391
+ Types: bool
392
+
393
+ sort:
394
+ Optional argument.
395
+ Specifies a flag to sort the columns axis if it is not already aligned when
396
+ the join argument is set to 'outer'.
397
+ Default value: False
398
+ Types: bool
399
+
400
+ ignore_index:
401
+ Optional argument.
402
+ Specifies whether to ignore the index columns in resulting DataFrame or not.
403
+ If True, then index columns will be ignored in the concat operation.
404
+ Default value: False
405
+ Types: bool
406
+
407
+ RETURNS:
408
+ teradataml DataFrame, if result does not contain any geometry data, otherwise returns teradataml GeoDataFrame.
409
+
410
+ RAISES:
411
+ TeradataMlException
412
+
413
+ EXAMPLES:
414
+ >>> from teradataml import load_example_data
415
+ >>> load_example_data("dataframe", "admissions_train")
416
+ >>> load_example_data("geodataframe", ["sample_shapes"])
417
+ >>> from teradataml.dataframe import concat
418
+ >>>
419
+ >>> # Default options
420
+ >>> df = DataFrame('admissions_train')
421
+ >>> df1 = df[df.gpa == 4].select(['id', 'stats', 'masters', 'gpa'])
422
+ >>> df1
423
+ stats masters gpa
424
+ id
425
+ 13 Advanced no 4.0
426
+ 29 Novice yes 4.0
427
+ 15 Advanced yes 4.0
428
+ >>> df2 = df[df.gpa < 2].select(['id', 'stats', 'programming', 'admitted'])
429
+ >>> df2
430
+ stats programming admitted
431
+ id
432
+ 24 Advanced Novice 1
433
+ 19 Advanced Advanced 0
434
+ >>> cdf = concat([df1, df2])
435
+ >>> cdf
436
+ stats masters gpa programming admitted
437
+ id
438
+ 19 Advanced None NaN Advanced 0
439
+ 24 Advanced None NaN Novice 1
440
+ 13 Advanced no 4.0 None None
441
+ 29 Novice yes 4.0 None None
442
+ 15 Advanced yes 4.0 None None
443
+ >>>
444
+ >>> # concat more than two DataFrames
445
+ >>> df3 = df[df.gpa == 3].select(['id', 'stats', 'programming', 'gpa'])
446
+ >>> df3
447
+ stats programming gpa
448
+ id
449
+ 36 Advanced Novice 3.0
450
+ >>> cdf = concat([df1, df2, df3])
451
+ >>> cdf
452
+ stats masters gpa programming admitted
453
+ id
454
+ 15 Advanced yes 4.0 None NaN
455
+ 19 Advanced None NaN Advanced 0.0
456
+ 36 Advanced None 3.0 Novice NaN
457
+ 29 Novice yes 4.0 None NaN
458
+ 13 Advanced no 4.0 None NaN
459
+ 24 Advanced None NaN Novice 1.0
460
+
461
+ >>> # join = 'inner'
462
+ >>> cdf = concat([df1, df2], join='inner')
463
+ >>> cdf
464
+ stats
465
+ id
466
+ 19 Advanced
467
+ 24 Advanced
468
+ 13 Advanced
469
+ 29 Novice
470
+ 15 Advanced
471
+ >>>
472
+ >>> # allow_duplicates = True (default)
473
+ >>> cdf = concat([df1, df2])
474
+ >>> cdf
475
+ stats masters gpa programming admitted
476
+ id
477
+ 19 Advanced None NaN Advanced 0
478
+ 24 Advanced None NaN Novice 1
479
+ 13 Advanced no 4.0 None None
480
+ 29 Novice yes 4.0 None None
481
+ 15 Advanced yes 4.0 None None
482
+ >>> cdf = concat([cdf, df2])
483
+ >>> cdf
484
+ stats masters gpa programming admitted
485
+ id
486
+ 19 Advanced None NaN Advanced 0
487
+ 13 Advanced no 4.0 None None
488
+ 24 Advanced None NaN Novice 1
489
+ 24 Advanced None NaN Novice 1
490
+ 19 Advanced None NaN Advanced 0
491
+ 29 Novice yes 4.0 None None
492
+ 15 Advanced yes 4.0 None None
493
+ >>>
494
+ >>> # allow_duplicates = False
495
+ >>> cdf = concat([cdf, df2], allow_duplicates=False)
496
+ >>> cdf
497
+ stats masters gpa programming admitted
498
+ id
499
+ 19 Advanced None NaN Advanced 0
500
+ 29 Novice yes 4.0 None None
501
+ 24 Advanced None NaN Novice 1
502
+ 15 Advanced yes 4.0 None None
503
+ 13 Advanced no 4.0 None None
504
+ >>>
505
+ >>> # sort = True
506
+ >>> cdf = concat([df1, df2], sort=True)
507
+ >>> cdf
508
+ admitted gpa masters programming stats
509
+ id
510
+ 19 0 NaN None Advanced Advanced
511
+ 24 1 NaN None Novice Advanced
512
+ 13 None 4.0 no None Advanced
513
+ 29 None 4.0 yes None Novice
514
+ 15 None 4.0 yes None Advanced
515
+ >>>
516
+ >>> # ignore_index = True
517
+ >>> cdf = concat([df1, df2], ignore_index=True)
518
+ >>> cdf
519
+ stats masters gpa programming admitted
520
+ 0 Advanced yes 4.0 None NaN
521
+ 1 Advanced None NaN Advanced 0.0
522
+ 2 Novice yes 4.0 None NaN
523
+ 3 Advanced None NaN Novice 1.0
524
+ 4 Advanced no 4.0 None NaN
525
+
526
+ # Perform concatenation of two GeoDataFrames
527
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
528
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
529
+ >>> geo_dataframe1
530
+
531
+ skey linestrings
532
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
533
+
534
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','polygons'])
535
+ >>> geo_dataframe2
536
+
537
+ skey polygons
538
+ 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
539
+ 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
540
+ 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
541
+ 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
542
+ 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
543
+ 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
544
+ 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
545
+ 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
546
+ 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
547
+
548
+
549
+ >>> concat([geo_dataframe1,geo_dataframe2])
550
+
551
+ skey linestrings polygons
552
+ 1009 None MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
553
+ 1005 None POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
554
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80) None
555
+ 1004 None POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
556
+ 1003 None POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
557
+ 1001 None POLYGON ((0 0,0 20,20 20,20 0,0 0))
558
+ 1002 None POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
559
+ 1007 None MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
560
+ 1006 None POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
561
+ 1008 None MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
562
+
563
+ # Perform concatenation of a DataFrame and GeoDataFrame which returns a GeoDataFrame.
564
+ >>> normal_df=df.select(['id','stats'])
565
+ >>> normal_df
566
+ stats
567
+ id
568
+ 34 Advanced
569
+ 32 Advanced
570
+ 11 Advanced
571
+ 40 Novice
572
+ 38 Advanced
573
+ 36 Advanced
574
+ 7 Novice
575
+ 26 Advanced
576
+ 19 Advanced
577
+ 13 Advanced
578
+ >>> geo_df = geo_dataframe[geo_dataframe.skey < 1010].select(['skey', 'polygons'])
579
+ >>> geo_df
580
+
581
+ skey polygons
582
+ 1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
583
+ 1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
584
+ 1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
585
+ 1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
586
+ 1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
587
+ 1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
588
+ 1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
589
+ 1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
590
+ 1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
591
+
592
+ >>> idf = concat([normal_df, geo_df])
593
+ >>> idf
594
+ stats skey polygons
595
+ id
596
+ 38 Advanced None None
597
+ 7 Novice None None
598
+ 26 Advanced None None
599
+ 17 Advanced None None
600
+ 34 Advanced None None
601
+ 13 Advanced None None
602
+ 32 Advanced None None
603
+ 11 Advanced None None
604
+ 15 Advanced None None
605
+ 36 Advanced None None
606
+ >>>
607
+ """
608
+ concat_join_permitted_values = ['INNER', 'OUTER']
609
+
610
+ # Below matrix is list of list, where in each row contains following elements:
611
+ # Let's take an example of following, just to get an idea:
612
+ # [element1, element2, element3, element4, element5, element6]
613
+ # e.g.
614
+ # ["join", join, True, (str), True, concat_join_permitted_values]
615
+
616
+ # 1. element1 --> Argument Name, a string. ["join" in above example.]
617
+ # 2. element2 --> Argument itself. [join]
618
+ # 3. element3 --> Specifies a flag that mentions argument is optional or not.
619
+ # False, means required and True means optional.
620
+ # 4. element4 --> Tuple of accepted types. (str) in above example.
621
+ # 5. element5 --> True, means validate for empty value. Error will be raised, if empty values is passed.
622
+ # If not specified, means same as specifying False.
623
+ # 6. element6 --> A list of permitted values, an argument can accept.
624
+ # If not specified, it is as good as passing None. If a list is passed, validation will be
625
+ # performed for permitted values.
626
+ awu_matrix = []
627
+ awu_matrix.append(["df_list", df_list, False, (list)])
628
+ awu_matrix.append(["join", join, True, (str), True, concat_join_permitted_values])
629
+ awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
630
+ awu_matrix.append(["sort", sort, False, (bool)])
631
+ awu_matrix.append(["ignore_index", ignore_index, False, (bool)])
632
+ setop_type='concat'
633
+
634
+ # Validate Set operator arguments
635
+ __validate_setop_args(df_list, awu_matrix, setop_type)
636
+
637
+ # Generate the columns and their type to output, and check if the evaluation has to be lazy
638
+ master_columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort, ignore_index)
639
+
640
+ try:
641
+ aed_utils = AedUtils()
642
+
643
+ # Set the index_label to columns in first df's index_label if it is being projected,
644
+ # else set it to columns in second df's index_label if it is being projected, else go on till last.
645
+ # Finally set to None if none of df's have index_label
646
+ index_label = None
647
+ index_to_use = None
648
+ for df in df_list:
649
+ if df._index_label is not None and any(ind_col in master_columns_dict for ind_col in df._index_label):
650
+ index_label = []
651
+ index_to_use = df._index_label
652
+ break
653
+
654
+ if index_to_use is not None:
655
+ for ind_col in index_to_use:
656
+ if ind_col in master_columns_dict:
657
+ index_label.append(ind_col)
658
+
659
+ # Remove index columns if 'ignore_index' is set to True from master_columns_dict
660
+ if ignore_index and index_to_use is not None:
661
+ index_label = None
662
+ index_to_use = None
663
+
664
+ col_list = []
665
+ for i in range(len(df_list)):
666
+ col_list.append([])
667
+
668
+ # Now create the list of columns for each DataFrame to concatenate
669
+ type_compiler = td_type_compiler(td_dialect)
670
+ for col_name, value in master_columns_dict.items():
671
+ for i in range(len(col_list)):
672
+ if not value['col_present'][i]:
673
+ col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']),
674
+ UtilFuncs._teradata_quote_arg(col_name, "\"",
675
+ False)))
676
+ else:
677
+ col_name = UtilFuncs._process_for_teradata_keyword(col_name)
678
+ col_list[i].append(col_name)
679
+
680
+ input_table_columns = []
681
+ for i in range(len(col_list)):
682
+ input_table_columns.append(','.join(col_list[i]))
683
+
684
+ concat_nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
685
+ 'unionall' if allow_duplicates else 'union',
686
+ input_table_columns)
687
+ return __process_operation(master_columns_dict, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
688
+
689
+ except TeradataMlException:
690
+ raise
691
+ except Exception as err:
692
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
693
+ MessageCodes.SETOP_FAILED) from err
694
+
695
+
696
+ @collect_queryband(queryband="tdIntersect")
697
+ def td_intersect(df_list, allow_duplicates=True):
698
+ """
699
+ DESCRIPTION:
700
+ Function intersects a list of teradataml DataFrames or GeoDataFrames along the index axis and
701
+ returns a DataFrame with rows common to all input DataFrames.
702
+ Note:
703
+ This function should be applied to data frames of the same type: either all teradataml DataFrames,
704
+ or all GeoDataFrames.
705
+
706
+ PARAMETERS:
707
+ df_list:
708
+ Required argument.
709
+ Specifies the list of teradataml DataFrames or GeoDataFrames on which the intersection is to be performed.
710
+ Types: list of teradataml DataFrames or GeoDataFrames
711
+
712
+ allow_duplicates:
713
+ Optional argument.
714
+ Specifies if the result of intersection can have duplicate rows.
715
+ Default value: True
716
+ Types: bool
717
+
718
+ RETURNS:
719
+ teradataml DataFrame when intersect is performed on teradataml DataFrames.
720
+ teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
721
+
722
+ RAISES:
723
+ TeradataMlException, TypeError
724
+
725
+ EXAMPLES:
726
+ >>> from teradataml import load_example_data
727
+ >>> load_example_data("dataframe", "setop_test1")
728
+ >>> load_example_data("dataframe", "setop_test2")
729
+ >>> load_example_data("geodataframe", ["sample_shapes"])
730
+ >>> from teradataml.dataframe.setop import td_intersect
731
+ >>>
732
+ >>> df1 = DataFrame('setop_test1')
733
+ >>> df1
734
+ masters gpa stats programming admitted
735
+ id
736
+ 62 no 3.70 Advanced Advanced 1
737
+ 53 yes 3.50 Beginner Novice 1
738
+ 69 no 3.96 Advanced Advanced 1
739
+ 61 yes 4.00 Advanced Advanced 1
740
+ 58 no 3.13 Advanced Advanced 1
741
+ 51 yes 3.76 Beginner Beginner 0
742
+ 68 no 1.87 Advanced Novice 1
743
+ 66 no 3.87 Novice Beginner 1
744
+ 60 no 4.00 Advanced Novice 1
745
+ 59 no 3.65 Novice Novice 1
746
+ >>> df2 = DataFrame('setop_test2')
747
+ >>> df2
748
+ masters gpa stats programming admitted
749
+ id
750
+ 12 no 3.65 Novice Novice 1
751
+ 15 yes 4.00 Advanced Advanced 1
752
+ 14 yes 3.45 Advanced Advanced 0
753
+ 20 yes 3.90 Advanced Advanced 1
754
+ 18 yes 3.81 Advanced Advanced 1
755
+ 17 no 3.83 Advanced Advanced 1
756
+ 13 no 4.00 Advanced Novice 1
757
+ 11 no 3.13 Advanced Advanced 1
758
+ 60 no 4.00 Advanced Novice 1
759
+ 19 yes 1.98 Advanced Advanced 0
760
+ >>> idf = td_intersect([df1, df2])
761
+ >>> idf
762
+ masters gpa stats programming admitted
763
+ id
764
+ 64 yes 3.81 Advanced Advanced 1
765
+ 60 no 4.00 Advanced Novice 1
766
+ 58 no 3.13 Advanced Advanced 1
767
+ 68 no 1.87 Advanced Novice 1
768
+ 66 no 3.87 Novice Beginner 1
769
+ 60 no 4.00 Advanced Novice 1
770
+ 62 no 3.70 Advanced Advanced 1
771
+ >>>
772
+ >>> idf = td_intersect([df1, df2], allow_duplicates=False)
773
+ >>> idf
774
+ masters gpa stats programming admitted
775
+ id
776
+ 64 yes 3.81 Advanced Advanced 1
777
+ 60 no 4.00 Advanced Novice 1
778
+ 58 no 3.13 Advanced Advanced 1
779
+ 68 no 1.87 Advanced Novice 1
780
+ 66 no 3.87 Novice Beginner 1
781
+ 62 no 3.70 Advanced Advanced 1
782
+ >>> # intersecting more than two DataFrames
783
+ >>> df3 = df1[df1.gpa <= 3.5]
784
+ >>> df3
785
+ masters gpa stats programming admitted
786
+ id
787
+ 58 no 3.13 Advanced Advanced 1
788
+ 67 yes 3.46 Novice Beginner 0
789
+ 54 yes 3.50 Beginner Advanced 1
790
+ 68 no 1.87 Advanced Novice 1
791
+ 53 yes 3.50 Beginner Novice 1
792
+ >>> idf = td_intersect([df1, df2, df3])
793
+ >>> idf
794
+ masters gpa stats programming admitted
795
+ id
796
+ 58 no 3.13 Advanced Advanced 1
797
+ 68 no 1.87 Advanced Novice 1
798
+
799
+ # Perform intersection of two GeoDataFrames.
800
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
801
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
802
+ >>> geo_dataframe1
803
+
804
+ skey linestrings
805
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
806
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
807
+ >>> geo_dataframe2
808
+
809
+ skey linestrings
810
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
811
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
812
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
813
+ 1002 LINESTRING (1 3,3 0,0 1)
814
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
815
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
816
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
817
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
818
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
819
+ >>> td_intersect([geo_dataframe1,geo_dataframe2])
820
+
821
+ skey linestrings
822
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
823
+ """
824
+ awu_matrix = []
825
+ awu_matrix.append(["df_list", df_list, False, (list)])
826
+ awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
827
+ setop_type = 'td_intersect'
828
+ operation = 'intersect'
829
+
830
+ # Validate Set operator arguments
831
+ __validate_setop_args(df_list, awu_matrix, setop_type)
832
+
833
+ return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
834
+
835
+
836
+ @collect_queryband(queryband="tdMinus")
837
+ def td_minus(df_list, allow_duplicates=True):
838
+ """
839
+ DESCRIPTION:
840
+ This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
841
+ and not in other teradataml DataFrames or GeoDataFrames along the index axis.
842
+ Note:
843
+ This function should be applied to data frames of the same type: either all teradataml DataFrames,
844
+ or all GeoDataFrames.
845
+
846
+ PARAMETERS:
847
+ df_list:
848
+ Required argument.
849
+ Specifies the list of teradataml DataFrames or GeoDataFrames on which the minus
850
+ operation is to be performed.
851
+ Types: list of teradataml DataFrames or GeoDataFrames
852
+
853
+ allow_duplicates:
854
+ Optional argument.
855
+ Specifies if the result of minus operation can have duplicate rows.
856
+ Default value: True
857
+ Types: bool
858
+
859
+ RETURNS:
860
+ teradataml DataFrame when operation is performed on teradataml DataFrames.
861
+ teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
862
+
863
+ RAISES:
864
+ TeradataMlException, TypeError
865
+
866
+ EXAMPLES:
867
+ >>> from teradataml import load_example_data
868
+ >>> load_example_data("dataframe", "setop_test1")
869
+ >>> load_example_data("dataframe", "setop_test2")
870
+ >>> load_example_data("geodataframe", ["sample_shapes"])
871
+ >>> from teradataml.dataframe.setop import td_minus
872
+ >>>
873
+ >>> df1 = DataFrame('setop_test1')
874
+ >>> df1
875
+ masters gpa stats programming admitted
876
+ id
877
+ 62 no 3.70 Advanced Advanced 1
878
+ 53 yes 3.50 Beginner Novice 1
879
+ 69 no 3.96 Advanced Advanced 1
880
+ 61 yes 4.00 Advanced Advanced 1
881
+ 58 no 3.13 Advanced Advanced 1
882
+ 51 yes 3.76 Beginner Beginner 0
883
+ 68 no 1.87 Advanced Novice 1
884
+ 66 no 3.87 Novice Beginner 1
885
+ 60 no 4.00 Advanced Novice 1
886
+ 59 no 3.65 Novice Novice 1
887
+ >>> df2 = DataFrame('setop_test2')
888
+ >>> df2
889
+ masters gpa stats programming admitted
890
+ id
891
+ 12 no 3.65 Novice Novice 1
892
+ 15 yes 4.00 Advanced Advanced 1
893
+ 14 yes 3.45 Advanced Advanced 0
894
+ 20 yes 3.90 Advanced Advanced 1
895
+ 18 yes 3.81 Advanced Advanced 1
896
+ 17 no 3.83 Advanced Advanced 1
897
+ 13 no 4.00 Advanced Novice 1
898
+ 11 no 3.13 Advanced Advanced 1
899
+ 60 no 4.00 Advanced Novice 1
900
+ 19 yes 1.98 Advanced Advanced 0
901
+ >>> idf = td_minus([df1[df1.id<55] , df2])
902
+ >>> idf
903
+ masters gpa stats programming admitted
904
+ id
905
+ 51 yes 3.76 Beginner Beginner 0
906
+ 50 yes 3.95 Beginner Beginner 0
907
+ 54 yes 3.50 Beginner Advanced 1
908
+ 52 no 3.70 Novice Beginner 1
909
+ 53 yes 3.50 Beginner Novice 1
910
+ 53 yes 3.50 Beginner Novice 1
911
+ >>>
912
+ >>> idf = td_minus([df1[df1.id<55] , df2], allow_duplicates=False)
913
+ >>> idf
914
+ masters gpa stats programming admitted
915
+ id
916
+ 54 yes 3.50 Beginner Advanced 1
917
+ 51 yes 3.76 Beginner Beginner 0
918
+ 53 yes 3.50 Beginner Novice 1
919
+ 50 yes 3.95 Beginner Beginner 0
920
+ 52 no 3.70 Novice Beginner 1
921
+ >>> # applying minus on more than two DataFrames
922
+ >>> df3 = df1[df1.gpa <= 3.9]
923
+ >>> idf = td_minus([df1, df2, df3])
924
+ >>> idf
925
+ masters gpa stats programming admitted
926
+ id
927
+ 61 yes 4.00 Advanced Advanced 1
928
+ 50 yes 3.95 Beginner Beginner 0
929
+ 69 no 3.96 Advanced Advanced 1
930
+
931
+ # td_minus on GeoDataFrame
932
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
933
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
934
+ >>> geo_dataframe1
935
+ skey linestrings
936
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
937
+
938
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
939
+ >>> geo_dataframe2
940
+ skey linestrings
941
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
942
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
943
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
944
+ 1002 LINESTRING (1 3,3 0,0 1)
945
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
946
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
947
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
948
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
949
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
950
+
951
+ >>> td_minus([geo_dataframe2,geo_dataframe1])
952
+ linestrings
953
+ skey
954
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
955
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
956
+ 1002 LINESTRING (1 3,3 0,0 1)
957
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
958
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
959
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
960
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
961
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
962
+ """
963
+ awu_matrix = []
964
+ awu_matrix.append(["df_list", df_list, False, (list)])
965
+ awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
966
+ setop_type = 'td_except' if (inspect.stack()[2][3] and inspect.stack()[2][3] == 'td_except') else 'td_minus'
967
+ operation = 'minus'
968
+
969
+ # Validate Set operator arguments
970
+ __validate_setop_args(df_list, awu_matrix, setop_type)
971
+
972
+ return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
973
+
974
+
975
+ @collect_queryband(queryband="tdExcept")
976
+ def td_except(df_list, allow_duplicates=True):
977
+ """
978
+ DESCRIPTION:
979
+ This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
980
+ and not in other teradataml DataFrames or GeoDataFrames along the index axis.
981
+ Note:
982
+ This function should be applied to data frames of the same type: either all teradataml DataFrames,
983
+ or all GeoDataFrames.
984
+
985
+ PARAMETERS:
986
+ df_list:
987
+ Required argument.
988
+ Specifies the list of teradataml DataFrames or GeoDataFrames on which the except
989
+ operation is to be performed.
990
+ Types: list of teradataml DataFrames or GeoDataFrames
991
+
992
+ allow_duplicates:
993
+ Optional argument.
994
+ Specifies if the result of except operation can have duplicate rows.
995
+ Default value: True
996
+ Types: bool
997
+
998
+ RETURNS:
999
+ teradataml DataFrame when operation is performed on teradataml DataFrames.
1000
+ teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
1001
+
1002
+ RAISES:
1003
+ TeradataMlException, TypeError
1004
+
1005
+ EXAMPLES:
1006
+ >>> from teradataml import load_example_data
1007
+ >>> load_example_data("dataframe", "setop_test1")
1008
+ >>> load_example_data("dataframe", "setop_test2")
1009
+ >>> load_example_data("geodataframe", ["sample_shapes"])
1010
+ >>> from teradataml.dataframe.setop import td_except
1011
+ >>>
1012
+ >>> df1 = DataFrame('setop_test1')
1013
+ >>> df1
1014
+ masters gpa stats programming admitted
1015
+ id
1016
+ 62 no 3.70 Advanced Advanced 1
1017
+ 53 yes 3.50 Beginner Novice 1
1018
+ 69 no 3.96 Advanced Advanced 1
1019
+ 61 yes 4.00 Advanced Advanced 1
1020
+ 58 no 3.13 Advanced Advanced 1
1021
+ 51 yes 3.76 Beginner Beginner 0
1022
+ 68 no 1.87 Advanced Novice 1
1023
+ 66 no 3.87 Novice Beginner 1
1024
+ 60 no 4.00 Advanced Novice 1
1025
+ 59 no 3.65 Novice Novice 1
1026
+ >>> df2 = DataFrame('setop_test2')
1027
+ >>> df2
1028
+ masters gpa stats programming admitted
1029
+ id
1030
+ 12 no 3.65 Novice Novice 1
1031
+ 15 yes 4.00 Advanced Advanced 1
1032
+ 14 yes 3.45 Advanced Advanced 0
1033
+ 20 yes 3.90 Advanced Advanced 1
1034
+ 18 yes 3.81 Advanced Advanced 1
1035
+ 17 no 3.83 Advanced Advanced 1
1036
+ 13 no 4.00 Advanced Novice 1
1037
+ 11 no 3.13 Advanced Advanced 1
1038
+ 60 no 4.00 Advanced Novice 1
1039
+ 19 yes 1.98 Advanced Advanced 0
1040
+ >>> idf = td_except([df1[df1.id<55] , df2])
1041
+ >>> idf
1042
+ masters gpa stats programming admitted
1043
+ id
1044
+ 51 yes 3.76 Beginner Beginner 0
1045
+ 50 yes 3.95 Beginner Beginner 0
1046
+ 54 yes 3.50 Beginner Advanced 1
1047
+ 52 no 3.70 Novice Beginner 1
1048
+ 53 yes 3.50 Beginner Novice 1
1049
+ 53 yes 3.50 Beginner Novice 1
1050
+ >>>
1051
+ >>> idf = td_except([df1[df1.id<55] , df2], allow_duplicates=False)
1052
+ >>> idf
1053
+ masters gpa stats programming admitted
1054
+ id
1055
+ 54 yes 3.50 Beginner Advanced 1
1056
+ 51 yes 3.76 Beginner Beginner 0
1057
+ 53 yes 3.50 Beginner Novice 1
1058
+ 50 yes 3.95 Beginner Beginner 0
1059
+ 52 no 3.70 Novice Beginner 1
1060
+ >>> # applying except on more than two DataFrames
1061
+ >>> df3 = df1[df1.gpa <= 3.9]
1062
+ >>> idf = td_except([df1, df2, df3])
1063
+ >>> idf
1064
+ masters gpa stats programming admitted
1065
+ id
1066
+ 61 yes 4.00 Advanced Advanced 1
1067
+ 50 yes 3.95 Beginner Beginner 0
1068
+ 69 no 3.96 Advanced Advanced 1
1069
+
1070
+ # td_except on GeoDataFrames
1071
+ >>> geo_dataframe = GeoDataFrame('sample_shapes')
1072
+ >>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
1073
+ >>> geo_dataframe1
1074
+ skey linestrings
1075
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1076
+
1077
+ >>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
1078
+ >>> geo_dataframe2
1079
+ skey linestrings
1080
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1081
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1082
+ 1004 LINESTRING (10 20 30,40 50 60,70 80 80)
1083
+ 1002 LINESTRING (1 3,3 0,0 1)
1084
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
1085
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1086
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1087
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1088
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1089
+
1090
+ >>> td_except([geo_dataframe2,geo_dataframe1])
1091
+ skey linestrings
1092
+ 1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
1093
+ 1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
1094
+ 1005 LINESTRING (1 3 6,3 0 6,6 0 1)
1095
+ 1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
1096
+ 1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
1097
+ 1001 LINESTRING (1 1,2 2,3 3,4 4)
1098
+ 1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
1099
+ 1002 LINESTRING (1 3,3 0,0 1)
1100
+ """
1101
+ return td_minus(df_list, allow_duplicates)
1102
+
1103
+
1104
+ def __process_setop_operation(df_list, allow_duplicates, setop_type, operation):
1105
+ """
1106
+ DESCRIPTION:
1107
+ Internal function to process set opertaion and return the result DataFrame/GeoDataFrame.
1108
+
1109
+ PARAMETERS:
1110
+ df_list:
1111
+ Required argument.
1112
+ Specifies the list of teradataml DataFrames/GeoDataFrames on which the except
1113
+ operation is to be performed.
1114
+ Types: list of teradataml DataFrames
1115
+
1116
+ allow_duplicates:
1117
+ Optional argument.
1118
+ Specifies if the result of except operation can have duplicate rows.
1119
+ Default value: True
1120
+ Types: bool
1121
+
1122
+ setop_type:
1123
+ Required argument.
1124
+ Specifies set opertaion.
1125
+ Types: str
1126
+
1127
+ operation:
1128
+ Required argument.
1129
+ Specifies set opertaion name.
1130
+ Types: str
1131
+
1132
+ RETURNS:
1133
+ teradataml DataFrame/GeoDataFrame
1134
+
1135
+ RAISES:
1136
+ TeradataMlException
1137
+
1138
+ EXAMPLES:
1139
+ >>> __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
1140
+
1141
+ """
1142
+
1143
+ # Check if set operation can be lazy or not
1144
+ is_lazy = __check_setop_if_lazy(df_list)
1145
+
1146
+ # Get the first DataFrame's metaexpr
1147
+ first_df_metaexpr = df_list[0]._metaexpr
1148
+
1149
+ try:
1150
+ aed_utils = AedUtils()
1151
+ input_table_columns = []
1152
+ for i in range(len(df_list)):
1153
+ col_list = []
1154
+ for j in range(len(df_list[i].columns)):
1155
+ col_list.append(UtilFuncs._process_for_teradata_keyword(df_list[i].columns[j]))
1156
+
1157
+ input_table_columns.append(','.join(col_list))
1158
+
1159
+ nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
1160
+ '{}all'.format(operation) if allow_duplicates else '{}'.format(operation),
1161
+ input_table_columns)
1162
+
1163
+ # Set the index_label to columns in first df's index_label if it is not None,
1164
+ # else set it to None i.e. no index_label.
1165
+ index_label = []
1166
+ index_to_use = None
1167
+ index_to_use = df_list[0]._index_label if df_list[0]._index_label is not None else None
1168
+
1169
+ if index_to_use is not None:
1170
+ index_label = index_to_use
1171
+
1172
+ class_name = df_list[0].__class__.__name__
1173
+ return __process_operation(first_df_metaexpr, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name)
1174
+
1175
+ except TeradataMlException:
1176
+ raise
1177
+ except Exception as err:
1178
+ raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
1179
+ MessageCodes.SETOP_FAILED) from err