teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (432) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +238 -1
  4. teradataml/__init__.py +13 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/Transformations.py +4 -4
  7. teradataml/analytics/__init__.py +0 -2
  8. teradataml/analytics/analytic_function_executor.py +3 -0
  9. teradataml/analytics/json_parser/utils.py +13 -12
  10. teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
  11. teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
  12. teradataml/analytics/sqle/__init__.py +0 -13
  13. teradataml/analytics/utils.py +1 -0
  14. teradataml/analytics/valib.py +3 -0
  15. teradataml/automl/__init__.py +1628 -0
  16. teradataml/automl/custom_json_utils.py +1270 -0
  17. teradataml/automl/data_preparation.py +993 -0
  18. teradataml/automl/data_transformation.py +727 -0
  19. teradataml/automl/feature_engineering.py +1648 -0
  20. teradataml/automl/feature_exploration.py +547 -0
  21. teradataml/automl/model_evaluation.py +163 -0
  22. teradataml/automl/model_training.py +887 -0
  23. teradataml/catalog/__init__.py +0 -2
  24. teradataml/catalog/byom.py +49 -6
  25. teradataml/catalog/function_argument_mapper.py +0 -2
  26. teradataml/catalog/model_cataloging_utils.py +2 -1021
  27. teradataml/common/aed_utils.py +6 -2
  28. teradataml/common/constants.py +50 -58
  29. teradataml/common/deprecations.py +160 -0
  30. teradataml/common/garbagecollector.py +61 -104
  31. teradataml/common/messagecodes.py +27 -36
  32. teradataml/common/messages.py +11 -15
  33. teradataml/common/utils.py +205 -287
  34. teradataml/common/wrapper_utils.py +1 -110
  35. teradataml/context/context.py +150 -78
  36. teradataml/data/bank_churn.csv +10001 -0
  37. teradataml/data/bmi.csv +501 -0
  38. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
  40. teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
  42. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
  43. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
  44. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
  45. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
  46. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
  47. teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
  48. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
  49. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
  50. teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
  51. teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
  52. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
  53. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
  54. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
  55. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
  56. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
  57. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
  58. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
  59. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
  60. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
  61. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
  62. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
  63. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
  64. teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
  65. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
  66. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
  67. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
  68. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
  69. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
  70. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
  71. teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
  72. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
  73. teradataml/data/fish.csv +160 -0
  74. teradataml/data/glass_types.csv +215 -0
  75. teradataml/data/insurance.csv +1 -1
  76. teradataml/data/iris_data.csv +151 -0
  77. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
  78. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
  79. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
  80. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
  81. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
  82. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
  83. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
  84. teradataml/data/load_example_data.py +3 -0
  85. teradataml/data/multi_model_classification.csv +401 -0
  86. teradataml/data/multi_model_regression.csv +401 -0
  87. teradataml/data/openml_example.json +63 -0
  88. teradataml/data/scripts/deploy_script.py +65 -0
  89. teradataml/data/scripts/mapper.R +20 -0
  90. teradataml/data/scripts/sklearn/__init__.py +0 -0
  91. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  92. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  93. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  94. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  95. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  96. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  97. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  98. teradataml/data/templates/open_source_ml.json +9 -0
  99. teradataml/data/teradataml_example.json +73 -1
  100. teradataml/data/test_classification.csv +101 -0
  101. teradataml/data/test_prediction.csv +101 -0
  102. teradataml/data/test_regression.csv +101 -0
  103. teradataml/data/train_multiclass.csv +101 -0
  104. teradataml/data/train_regression.csv +101 -0
  105. teradataml/data/train_regression_multiple_labels.csv +101 -0
  106. teradataml/data/wine_data.csv +1600 -0
  107. teradataml/dataframe/copy_to.py +79 -13
  108. teradataml/dataframe/data_transfer.py +8 -0
  109. teradataml/dataframe/dataframe.py +910 -311
  110. teradataml/dataframe/dataframe_utils.py +102 -5
  111. teradataml/dataframe/fastload.py +11 -3
  112. teradataml/dataframe/setop.py +15 -2
  113. teradataml/dataframe/sql.py +3735 -77
  114. teradataml/dataframe/sql_function_parameters.py +56 -5
  115. teradataml/dataframe/vantage_function_types.py +45 -1
  116. teradataml/dataframe/window.py +30 -29
  117. teradataml/dbutils/dbutils.py +18 -1
  118. teradataml/geospatial/geodataframe.py +18 -7
  119. teradataml/geospatial/geodataframecolumn.py +5 -0
  120. teradataml/hyperparameter_tuner/optimizer.py +910 -120
  121. teradataml/hyperparameter_tuner/utils.py +131 -37
  122. teradataml/lib/aed_0_1.dll +0 -0
  123. teradataml/lib/libaed_0_1.dylib +0 -0
  124. teradataml/lib/libaed_0_1.so +0 -0
  125. teradataml/libaed_0_1.dylib +0 -0
  126. teradataml/libaed_0_1.so +0 -0
  127. teradataml/opensource/__init__.py +1 -0
  128. teradataml/opensource/sklearn/__init__.py +1 -0
  129. teradataml/opensource/sklearn/_class.py +255 -0
  130. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  131. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  132. teradataml/opensource/sklearn/constants.py +54 -0
  133. teradataml/options/__init__.py +3 -6
  134. teradataml/options/configure.py +21 -20
  135. teradataml/scriptmgmt/UserEnv.py +61 -5
  136. teradataml/scriptmgmt/lls_utils.py +135 -53
  137. teradataml/table_operators/Apply.py +38 -6
  138. teradataml/table_operators/Script.py +45 -308
  139. teradataml/table_operators/TableOperator.py +182 -591
  140. teradataml/table_operators/__init__.py +0 -1
  141. teradataml/table_operators/table_operator_util.py +32 -40
  142. teradataml/utils/validators.py +127 -3
  143. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
  144. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
  145. teradataml/analytics/mle/AdaBoost.py +0 -651
  146. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  147. teradataml/analytics/mle/Antiselect.py +0 -342
  148. teradataml/analytics/mle/Arima.py +0 -641
  149. teradataml/analytics/mle/ArimaPredict.py +0 -477
  150. teradataml/analytics/mle/Attribution.py +0 -1070
  151. teradataml/analytics/mle/Betweenness.py +0 -658
  152. teradataml/analytics/mle/Burst.py +0 -711
  153. teradataml/analytics/mle/CCM.py +0 -600
  154. teradataml/analytics/mle/CCMPrepare.py +0 -324
  155. teradataml/analytics/mle/CFilter.py +0 -460
  156. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  157. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  158. teradataml/analytics/mle/Closeness.py +0 -737
  159. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  160. teradataml/analytics/mle/Correlation.py +0 -477
  161. teradataml/analytics/mle/Correlation2.py +0 -573
  162. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  163. teradataml/analytics/mle/CoxPH.py +0 -556
  164. teradataml/analytics/mle/CoxSurvival.py +0 -478
  165. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  166. teradataml/analytics/mle/DTW.py +0 -623
  167. teradataml/analytics/mle/DWT.py +0 -564
  168. teradataml/analytics/mle/DWT2D.py +0 -599
  169. teradataml/analytics/mle/DecisionForest.py +0 -716
  170. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  171. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  172. teradataml/analytics/mle/DecisionTree.py +0 -830
  173. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  174. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  175. teradataml/analytics/mle/FMeasure.py +0 -402
  176. teradataml/analytics/mle/FPGrowth.py +0 -734
  177. teradataml/analytics/mle/FrequentPaths.py +0 -695
  178. teradataml/analytics/mle/GLM.py +0 -558
  179. teradataml/analytics/mle/GLML1L2.py +0 -547
  180. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  181. teradataml/analytics/mle/GLMPredict.py +0 -529
  182. teradataml/analytics/mle/HMMDecoder.py +0 -945
  183. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  184. teradataml/analytics/mle/HMMSupervised.py +0 -521
  185. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  186. teradataml/analytics/mle/Histogram.py +0 -561
  187. teradataml/analytics/mle/IDWT.py +0 -476
  188. teradataml/analytics/mle/IDWT2D.py +0 -493
  189. teradataml/analytics/mle/IdentityMatch.py +0 -763
  190. teradataml/analytics/mle/Interpolator.py +0 -918
  191. teradataml/analytics/mle/KMeans.py +0 -485
  192. teradataml/analytics/mle/KNN.py +0 -627
  193. teradataml/analytics/mle/KNNRecommender.py +0 -488
  194. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  195. teradataml/analytics/mle/LAR.py +0 -439
  196. teradataml/analytics/mle/LARPredict.py +0 -478
  197. teradataml/analytics/mle/LDA.py +0 -548
  198. teradataml/analytics/mle/LDAInference.py +0 -492
  199. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  200. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  201. teradataml/analytics/mle/LinReg.py +0 -433
  202. teradataml/analytics/mle/LinRegPredict.py +0 -438
  203. teradataml/analytics/mle/MinHash.py +0 -544
  204. teradataml/analytics/mle/Modularity.py +0 -587
  205. teradataml/analytics/mle/NEREvaluator.py +0 -410
  206. teradataml/analytics/mle/NERExtractor.py +0 -595
  207. teradataml/analytics/mle/NERTrainer.py +0 -458
  208. teradataml/analytics/mle/NGrams.py +0 -570
  209. teradataml/analytics/mle/NPath.py +0 -634
  210. teradataml/analytics/mle/NTree.py +0 -549
  211. teradataml/analytics/mle/NaiveBayes.py +0 -462
  212. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  213. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  214. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  215. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  216. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  217. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  218. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  219. teradataml/analytics/mle/POSTagger.py +0 -417
  220. teradataml/analytics/mle/Pack.py +0 -411
  221. teradataml/analytics/mle/PageRank.py +0 -535
  222. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  223. teradataml/analytics/mle/PathGenerator.py +0 -367
  224. teradataml/analytics/mle/PathStart.py +0 -464
  225. teradataml/analytics/mle/PathSummarizer.py +0 -470
  226. teradataml/analytics/mle/Pivot.py +0 -471
  227. teradataml/analytics/mle/ROC.py +0 -425
  228. teradataml/analytics/mle/RandomSample.py +0 -637
  229. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  230. teradataml/analytics/mle/SAX.py +0 -779
  231. teradataml/analytics/mle/SVMDense.py +0 -677
  232. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  233. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  234. teradataml/analytics/mle/SVMSparse.py +0 -557
  235. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  236. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  237. teradataml/analytics/mle/Sampling.py +0 -549
  238. teradataml/analytics/mle/Scale.py +0 -565
  239. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  240. teradataml/analytics/mle/ScaleMap.py +0 -378
  241. teradataml/analytics/mle/ScaleSummary.py +0 -320
  242. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  243. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  244. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  245. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  246. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  247. teradataml/analytics/mle/Sessionize.py +0 -475
  248. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  249. teradataml/analytics/mle/StringSimilarity.py +0 -425
  250. teradataml/analytics/mle/TF.py +0 -389
  251. teradataml/analytics/mle/TFIDF.py +0 -504
  252. teradataml/analytics/mle/TextChunker.py +0 -414
  253. teradataml/analytics/mle/TextClassifier.py +0 -399
  254. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  255. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  256. teradataml/analytics/mle/TextMorph.py +0 -494
  257. teradataml/analytics/mle/TextParser.py +0 -623
  258. teradataml/analytics/mle/TextTagger.py +0 -530
  259. teradataml/analytics/mle/TextTokenizer.py +0 -502
  260. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  261. teradataml/analytics/mle/Unpack.py +0 -526
  262. teradataml/analytics/mle/Unpivot.py +0 -438
  263. teradataml/analytics/mle/VarMax.py +0 -776
  264. teradataml/analytics/mle/VectorDistance.py +0 -762
  265. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  266. teradataml/analytics/mle/XGBoost.py +0 -842
  267. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  268. teradataml/analytics/mle/__init__.py +0 -123
  269. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  270. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  271. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  272. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  273. teradataml/analytics/mle/json/arima_mle.json +0 -172
  274. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  275. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  276. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  277. teradataml/analytics/mle/json/burst_mle.json +0 -140
  278. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  279. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  280. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  281. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  282. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  283. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  284. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  285. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  286. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  287. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  288. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  289. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  290. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  291. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  292. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  293. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  294. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  295. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  296. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  297. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  298. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  299. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  300. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  301. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  302. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  303. teradataml/analytics/mle/json/glm_mle.json +0 -111
  304. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  305. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  306. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  307. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  308. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  309. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  310. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  311. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  312. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  313. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  314. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  315. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  316. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  317. teradataml/analytics/mle/json/knn_mle.json +0 -141
  318. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  319. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  320. teradataml/analytics/mle/json/lar_mle.json +0 -78
  321. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  322. teradataml/analytics/mle/json/lda_mle.json +0 -130
  323. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  324. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  325. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  326. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  327. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  328. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  329. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  330. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  331. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  332. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  333. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  334. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  335. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  336. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  337. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  338. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  339. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  340. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  341. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  342. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  343. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  344. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  345. teradataml/analytics/mle/json/pack_mle.json +0 -58
  346. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  347. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  348. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  349. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  350. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  351. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  352. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  353. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  354. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  355. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  356. teradataml/analytics/mle/json/roc_mle.json +0 -73
  357. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  358. teradataml/analytics/mle/json/sax_mle.json +0 -154
  359. teradataml/analytics/mle/json/scale_mle.json +0 -93
  360. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  361. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  362. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  363. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  364. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  365. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  366. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  367. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  368. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  369. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  370. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  371. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  372. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  373. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  374. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  375. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  376. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  377. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  378. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  379. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  380. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  381. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  382. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  383. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  384. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  385. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  386. teradataml/analytics/mle/json/tf_mle.json +0 -33
  387. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  388. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  389. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  390. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  391. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  392. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  393. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  394. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  395. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  396. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  397. teradataml/analytics/sqle/Antiselect.py +0 -321
  398. teradataml/analytics/sqle/Attribution.py +0 -603
  399. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  400. teradataml/analytics/sqle/GLMPredict.py +0 -430
  401. teradataml/analytics/sqle/MovingAverage.py +0 -543
  402. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  403. teradataml/analytics/sqle/NPath.py +0 -632
  404. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  405. teradataml/analytics/sqle/Pack.py +0 -388
  406. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  407. teradataml/analytics/sqle/Sessionize.py +0 -390
  408. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  409. teradataml/analytics/sqle/Unpack.py +0 -503
  410. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  411. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  412. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  413. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  414. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  415. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  416. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  417. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  418. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  419. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  420. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  421. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  422. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  423. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  424. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  425. teradataml/catalog/model_cataloging.py +0 -980
  426. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  427. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  428. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  429. teradataml/table_operators/sandbox_container_util.py +0 -643
  430. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
  431. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
  432. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
@@ -29,12 +29,12 @@ from teradataml.dataframe.sql_interfaces import ColumnExpression
29
29
  from teradataml.dataframe.sql_functions import case
30
30
  from teradataml.series.series import Series
31
31
  from teradatasqlalchemy.types import _TDType, BIGINT, INTEGER, PERIOD_TIMESTAMP, SMALLINT, BYTEINT, FLOAT, DECIMAL
32
- from teradataml.common.utils import UtilFuncs, argument_deprecation
32
+ from teradataml.common.utils import UtilFuncs
33
33
  from teradataml.common.exceptions import TeradataMlException
34
34
  from teradataml.common.messages import Messages
35
35
  from teradataml.common.messagecodes import MessageCodes
36
36
  from teradataml.common.constants import AEDConstants
37
- from teradataml.common.constants import SourceType, PythonTypes, TeradataConstants,\
37
+ from teradataml.common.constants import SourceType, PythonTypes, TeradataConstants, \
38
38
  TeradataTypes, PTITableConstants, TableOperatorConstants, SQLFunctionConstants
39
39
  from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils, DataFrameUtils
40
40
  from teradataml.dataframe.indexer import _LocationIndexer
@@ -52,9 +52,10 @@ from teradatasql import OperationalError
52
52
  from teradataml.dataframe.window import Window
53
53
  from teradataml.dataframe.data_transfer import _DataTransferUtils
54
54
  from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
55
+ from teradatasqlalchemy.telemetry.queryband import collect_queryband
55
56
 
56
- #TODO use logger when available on master branch
57
- #logger = teradatapylog.getLogger()
57
+ # TODO use logger when available on master branch
58
+ # logger = teradatapylog.getLogger()
58
59
  in_schema = UtilFuncs._in_schema
59
60
 
60
61
 
@@ -215,7 +216,7 @@ class DataFrame():
215
216
  raise ValueError(Messages.get_message(
216
217
  MessageCodes.FROM_QUERY_SELECT_SUPPORTED).format("Check the syntax."))
217
218
  raise ValueError(Messages.get_message(
218
- MessageCodes.FROM_QUERY_SELECT_SUPPORTED))
219
+ MessageCodes.FROM_QUERY_SELECT_SUPPORTED))
219
220
 
220
221
  self._nodeid = self._aed_utils._aed_query(self._query, temp_table_name)
221
222
  else:
@@ -229,6 +230,8 @@ class DataFrame():
229
230
 
230
231
  self._loc = _LocationIndexer(self)
231
232
  self._iloc = _LocationIndexer(self, integer_indexing=True)
233
+ self.__data = None
234
+ self.__data_columns = None
232
235
 
233
236
  except TeradataMlException:
234
237
  raise
@@ -239,6 +242,7 @@ class DataFrame():
239
242
  MessageCodes.TDMLDF_CREATE_FAIL) from err
240
243
 
241
244
  @classmethod
245
+ @collect_queryband(queryband="DF_fromTable")
242
246
  def from_table(cls, table_name, index=True, index_label=None):
243
247
  """
244
248
  Class method for creating a DataFrame from a table or a view.
@@ -295,6 +299,7 @@ class DataFrame():
295
299
  return cls(table_name, index, index_label)
296
300
 
297
301
  @classmethod
302
+ @collect_queryband(queryband="DF_fromQuery")
298
303
  def from_query(cls, query, index=True, index_label=None, materialize=False):
299
304
  """
300
305
  Class method for creating a DataFrame from a query.
@@ -407,12 +412,170 @@ class DataFrame():
407
412
  if undropped_index is not None and all(elem in [col.name for col in metaexpr.c] for elem in undropped_index):
408
413
  df._undropped_index = undropped_index
409
414
  elif undropped_index is not None and all(UtilFuncs._teradata_quote_arg(elem, "\"", False)
410
- in [col.name for col in metaexpr.c] for elem in undropped_index):
411
- df._undropped_index = undropped_index
415
+ in [col.name for col in metaexpr.c] for elem in undropped_index):
416
+ df._undropped_index = undropped_index
412
417
 
413
418
  return df
414
419
 
415
- def __execute_node_and_set_table_name(self, nodeid, metaexpr = None):
420
+ @collect_queryband(queryband="DF_fillna")
421
+ def fillna(self, value=None, columns=None, literal_value=False):
422
+ """
423
+ Method to replace the null values in a column with the value specified.
424
+
425
+ PARAMETERS:
426
+ value:
427
+ Required Argument.
428
+ Specifies the value(s) to replace the null values with. If value is a dict
429
+ then "columns" is ignored.
430
+ Note:
431
+ * To use pre-defined strings to replace the null value set "literal_value" to True.
432
+ Permitted Values:
433
+ * Pre-defined strings:
434
+ * 'MEAN' - Replace null value with the average of the values in the column.
435
+ * 'MODE' - Replace null value with the mode of the values in the column.
436
+ * 'MEDIAN' - Replace null value with the median of the values in the column.
437
+ * 'MIN' - Replace null value with the minimum of the values in the column.
438
+ * 'MAX' - Replace null value with the maximum of the values in the column.
439
+ Types: int, float, str, dict containing column names and value, list
440
+
441
+ columns:
442
+ Optional Argument.
443
+ Specifies the column names to perform the null value replacement. If "columns"
444
+ is None, then all the columns having null value and data type similar to
445
+ the data type of the value specified are considered.
446
+ Default Value: None
447
+ Types: str, tuple or list of str
448
+
449
+ literal_value:
450
+ Optional Argument.
451
+ Specifies whether the pre-defined strings passed to "value" should be treated
452
+ as literal or not.
453
+ Default Value: False
454
+ Types: bool
455
+
456
+ RETURNS:
457
+ teradataml DataFrame
458
+
459
+ RAISES:
460
+ TeradataMlException
461
+
462
+ EXAMPLES:
463
+ >>> load_example_data("dataframe", "sales")
464
+ >>> df = DataFrame("sales")
465
+ >>> df
466
+ Feb Jan Mar Apr datetime
467
+ accounts
468
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
469
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
470
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
471
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
472
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
473
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
474
+
475
+ # Example 1: Populate null value in column 'Jan' and 'Mar'
476
+ # with the value specified as dictionary.
477
+ >>> df.fillna({"Jan": 123, "Mar":234})
478
+ accounts Feb Jan Mar Apr datetime
479
+ 0 Blue Inc 90.0 50 95 101.0 17/01/04
480
+ 1 Alpha Co 210.0 200 215 250.0 17/01/04
481
+ 2 Jones LLC 200.0 150 140 180.0 17/01/04
482
+ 3 Yellow Inc 90.0 123 234 NaN 17/01/04
483
+ 4 Orange Inc 210.0 123 234 250.0 17/01/04
484
+ 5 Red Inc 200.0 150 140 NaN 17/01/04
485
+
486
+ # Example 2: Populate the null value in 'Jan' column
487
+ # with minimum value in that column.
488
+ >>> df.fillna("Min", "Jan")
489
+ accounts Feb Jan Mar Apr datetime
490
+ 0 Yellow Inc 90.0 50 NaN NaN 17/01/04
491
+ 1 Jones LLC 200.0 150 140.0 180.0 17/01/04
492
+ 2 Red Inc 200.0 150 140.0 NaN 17/01/04
493
+ 3 Blue Inc 90.0 50 95.0 101.0 17/01/04
494
+ 4 Alpha Co 210.0 200 215.0 250.0 17/01/04
495
+ 5 Orange Inc 210.0 50 NaN 250.0 17/01/04
496
+ """
497
+ from teradataml import SimpleImputeFit, SimpleImputeTransform
498
+
499
+ arg_info_matrix = []
500
+ arg_info_matrix.append(["value", value, True, (int, float, str, dict, list)])
501
+ arg_info_matrix.append(["columns", columns, True, (list, str, tuple)])
502
+ arg_info_matrix.append(["literal_value", literal_value, True, (bool)])
503
+
504
+ # Validate argument types
505
+ _Validators._validate_function_arguments(arg_info_matrix)
506
+
507
+ if isinstance(columns, tuple):
508
+ columns = list(columns)
509
+
510
+ # If dict is passed separate the values of 'columns' and 'value'
511
+ if isinstance(value, dict):
512
+ columns, value = zip(*value.items())
513
+ columns = [str(col) for col in columns]
514
+ value = [str(val) for val in value]
515
+
516
+ is_stats = False
517
+
518
+ for val in UtilFuncs._as_list(value):
519
+ if isinstance(val, str) and val.lower() in ["mean", "median", "mode", "min", "max"]:
520
+ is_stats = True
521
+ break
522
+
523
+ # If "literal_value" is set to False
524
+ if not literal_value and is_stats:
525
+ stats = []
526
+ stats_columns = []
527
+ literals = []
528
+ literals_columns = []
529
+ # If value is a list, extract columns and values, if values match to any
530
+ # predefined string then assign it to stats and column name to stats_column
531
+ # else treat it as a literal value and literal column.
532
+ if isinstance(value, list):
533
+ for val, col in zip(value, columns):
534
+ if isinstance(val, str) and val.lower() in ["mean", "median", "mode", "min", "max"]:
535
+ stats.append(val)
536
+ stats_columns.append(col)
537
+ else:
538
+ literals.append(str(val))
539
+ literals_columns.append(col)
540
+ else:
541
+ # In case it is not a list then simply assign it to 'stats' and 'stats_columns'
542
+ stats = value
543
+ stats_columns = columns
544
+
545
+ # In case no literal value found in the list and literal list is empty assign it as 'None'
546
+ # instead of empty list.
547
+ literals = None if not literals else literals
548
+ literals_columns = None if not literals_columns else literals_columns
549
+
550
+ else:
551
+ # If it is a literal value then 'stats' and 'stats_column' is not required
552
+ stats = None
553
+ stats_columns = None
554
+
555
+ # In case column is not specified by the user, then all the columns in that dataframe
556
+ # should be considered else the specified columns should be considered for 'literal_columns'
557
+ literals_columns = self.columns if (columns is None and value is not None) else columns
558
+ literals_columns = UtilFuncs._as_list(literals_columns)
559
+ # In case value is a list of single element, then multiply it as many times as
560
+ # number of columns ['12'] -> ['12','12', upto number of columns]
561
+ # else convert it to str and append it
562
+ if isinstance(value, list):
563
+ literals = []
564
+ for val in value:
565
+ literals.append(str(val))
566
+ else:
567
+ literals = UtilFuncs._as_list(str(value))
568
+ literals = literals * len(literals_columns) if len(literals) != len(literals_columns) else literals
569
+
570
+ fit_obj = SimpleImputeFit(data=self,
571
+ literals=literals,
572
+ literals_columns=literals_columns,
573
+ stats=stats,
574
+ stats_columns=stats_columns)
575
+
576
+ return fit_obj.transform(data=self).result
577
+
578
+ def __execute_node_and_set_table_name(self, nodeid, metaexpr=None):
416
579
  """
417
580
  Private method for executing node and setting _table_name,
418
581
  if not set already.
@@ -632,14 +795,14 @@ class DataFrame():
632
795
  # get the ColumnExpression from the _MetaExpression
633
796
  if isinstance(key, str):
634
797
  return self.__getattr__(key)
635
-
798
+
636
799
  if isinstance(key, list):
637
800
  return self.select(key)
638
801
 
639
802
  if isinstance(key, ClauseElement):
640
803
  from teradataml.dataframe.sql import _SQLColumnExpression
641
804
  key = _SQLColumnExpression(key)
642
-
805
+
643
806
  # apply the filter expression
644
807
  if isinstance(key, ColumnExpression):
645
808
 
@@ -662,7 +825,7 @@ class DataFrame():
662
825
  raise
663
826
 
664
827
  except ValueError:
665
- raise
828
+ raise
666
829
 
667
830
  except Exception as err:
668
831
  errcode = MessageCodes.TDMLDF_INFO_ERROR
@@ -1073,6 +1236,7 @@ class DataFrame():
1073
1236
  td_metadata = [(column.name, repr(column.type)) for column in self._metaexpr.c]
1074
1237
  return MetaData(td_metadata)
1075
1238
 
1239
+ @collect_queryband(queryband="DF_info")
1076
1240
  def info(self, verbose=True, buf=None, max_cols=None, null_counts=False):
1077
1241
  """
1078
1242
  DESCRIPTION:
@@ -1193,8 +1357,10 @@ class DataFrame():
1193
1357
  except TeradataMlException:
1194
1358
  raise
1195
1359
  except Exception as err:
1196
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR) from err
1360
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1361
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1197
1362
 
1363
+ @collect_queryband(queryband="DF_head")
1198
1364
  def head(self, n=display.max_rows):
1199
1365
  """
1200
1366
  DESCRIPTION:
@@ -1291,8 +1457,10 @@ class DataFrame():
1291
1457
  except TeradataMlException:
1292
1458
  raise
1293
1459
  except Exception as err:
1294
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR) from err
1460
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1461
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1295
1462
 
1463
+ @collect_queryband(queryband="DF_tail")
1296
1464
  def tail(self, n=display.max_rows):
1297
1465
  """
1298
1466
  DESCRIPTION:
@@ -1379,14 +1547,16 @@ class DataFrame():
1379
1547
 
1380
1548
  try:
1381
1549
  if self._metaexpr is None:
1382
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR)
1550
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1551
+ MessageCodes.TDMLDF_INFO_ERROR)
1383
1552
 
1384
1553
  sort_col = self._get_sort_col()
1385
1554
  return df_utils._get_sorted_nrow(self, n, sort_col[0], asc=False)
1386
1555
  except TeradataMlException:
1387
1556
  raise
1388
1557
  except Exception as err:
1389
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR) from err
1558
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1559
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1390
1560
 
1391
1561
  def _get_axis(self, axis):
1392
1562
  """
@@ -1414,14 +1584,17 @@ class DataFrame():
1414
1584
  elif axis == "columns":
1415
1585
  return 1
1416
1586
  else:
1417
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS), MessageCodes.TDMLDF_INVALID_DROP_AXIS)
1587
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
1588
+ MessageCodes.TDMLDF_INVALID_DROP_AXIS)
1418
1589
  elif isinstance(axis, numbers.Integral):
1419
1590
  if axis in [0, 1]:
1420
1591
  return axis
1421
1592
  else:
1422
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS), MessageCodes.TDMLDF_INVALID_DROP_AXIS)
1593
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
1594
+ MessageCodes.TDMLDF_INVALID_DROP_AXIS)
1423
1595
  else:
1424
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS), MessageCodes.TDMLDF_INVALID_DROP_AXIS)
1596
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
1597
+ MessageCodes.TDMLDF_INVALID_DROP_AXIS)
1425
1598
 
1426
1599
  def _get_sort_col(self):
1427
1600
  """
@@ -1446,7 +1619,7 @@ class DataFrame():
1446
1619
  col_name = self._index_label[0]
1447
1620
  else:
1448
1621
  col_name = self._index_label
1449
- else: #Use the first column from metadata
1622
+ else: # Use the first column from metadata
1450
1623
  col_name = self.columns[0]
1451
1624
 
1452
1625
  col_type = PythonTypes.PY_NULL_TYPE.value
@@ -1455,16 +1628,20 @@ class DataFrame():
1455
1628
  col_type = py_type
1456
1629
 
1457
1630
  if col_type == PythonTypes.PY_NULL_TYPE.value:
1458
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR)
1631
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1632
+ MessageCodes.TDMLDF_INFO_ERROR)
1459
1633
 
1460
1634
  sort_col_sqlalchemy_type = (self._metaexpr.t.c[col_name].type)
1461
1635
  # convert types to string from sqlalchemy type for the columns entered for sort
1462
1636
  sort_col_type = repr(sort_col_sqlalchemy_type).split("(")[0]
1463
1637
  if sort_col_type in unsupported_types:
1464
- raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, sort_col_type, "ANY, except following {}".format(unsupported_types)), MessageCodes.UNSUPPORTED_DATATYPE)
1638
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, sort_col_type,
1639
+ "ANY, except following {}".format(unsupported_types)),
1640
+ MessageCodes.UNSUPPORTED_DATATYPE)
1465
1641
 
1466
1642
  return (col_name, col_type)
1467
1643
 
1644
+ @collect_queryband(queryband="DF_drop")
1468
1645
  def drop(self, labels=None, axis=0, columns=None):
1469
1646
  """
1470
1647
  DESCRIPTION:
@@ -1585,7 +1762,7 @@ class DataFrame():
1585
1762
  index_labels = labels
1586
1763
  else:
1587
1764
  column_labels = labels
1588
- else: # Columns is not None
1765
+ else: # Columns is not None
1589
1766
  column_labels = columns
1590
1767
 
1591
1768
  if index_labels is not None:
@@ -1594,7 +1771,8 @@ class DataFrame():
1594
1771
 
1595
1772
  if isinstance(index_labels, list):
1596
1773
  if len(index_labels) == 0:
1597
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS), MessageCodes.TDMLDF_DROP_ARGS)
1774
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
1775
+ MessageCodes.TDMLDF_DROP_ARGS)
1598
1776
 
1599
1777
  if sort_col[1] == PythonTypes.PY_STRING_TYPE.value:
1600
1778
  index_labels = ["'{}'".format(x) for x in index_labels]
@@ -1606,25 +1784,29 @@ class DataFrame():
1606
1784
  index_expr = index_labels
1607
1785
 
1608
1786
  filter_expr = "{0} not in ({1})".format(sort_col[0], index_expr)
1609
- new_nodeid= self._aed_utils._aed_filter(self._nodeid, filter_expr)
1787
+ new_nodeid = self._aed_utils._aed_filter(self._nodeid, filter_expr)
1610
1788
  # Get the updated metaexpr
1611
1789
  new_metaexpr = UtilFuncs._get_metaexpr_using_parent_metaexpr(new_nodeid, self._metaexpr)
1612
1790
  return self._create_dataframe_from_node(new_nodeid, new_metaexpr, self._index_label)
1613
- else: # Column labels
1791
+ else: # Column labels
1614
1792
  select_cols = []
1615
1793
  cols = [x.name for x in self._metaexpr.columns]
1616
1794
  if isinstance(column_labels, list):
1617
1795
  if len(column_labels) == 0:
1618
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS), MessageCodes.TDMLDF_DROP_ARGS)
1796
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
1797
+ MessageCodes.TDMLDF_DROP_ARGS)
1619
1798
 
1620
1799
  if not all(isinstance(n, str) for n in column_labels):
1621
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES), MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES)
1800
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES),
1801
+ MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES)
1622
1802
  drop_cols = [x for x in column_labels]
1623
1803
  elif isinstance(column_labels, (tuple, dict)):
1624
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS), MessageCodes.TDMLDF_DROP_ARGS)
1804
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
1805
+ MessageCodes.TDMLDF_DROP_ARGS)
1625
1806
  else:
1626
1807
  if not isinstance(column_labels, str):
1627
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES), MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES)
1808
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES),
1809
+ MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES)
1628
1810
  drop_cols = [column_labels]
1629
1811
 
1630
1812
  for drop_name in drop_cols:
@@ -1637,14 +1819,17 @@ class DataFrame():
1637
1819
  select_cols.append(colname)
1638
1820
  if len(select_cols) > 0:
1639
1821
  return self.select(select_cols)
1640
- else: # no columns selected
1641
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ALL_COLS), MessageCodes.TDMLDF_DROP_ALL_COLS)
1822
+ else: # no columns selected
1823
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ALL_COLS),
1824
+ MessageCodes.TDMLDF_DROP_ALL_COLS)
1642
1825
 
1643
1826
  except TeradataMlException:
1644
1827
  raise
1645
1828
  except Exception as err:
1646
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR) from err
1829
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1830
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1647
1831
 
1832
+ @collect_queryband(queryband="DF_dropna")
1648
1833
  def dropna(self, how='any', thresh=None, subset=None):
1649
1834
  """
1650
1835
  DESCRIPTION:
@@ -1755,10 +1940,10 @@ class DataFrame():
1755
1940
  filter_expr = "{0} >= {1}".format(fmt_filter, thresh)
1756
1941
  elif how == 'any':
1757
1942
  filter_expr = "{0} = {1}".format(fmt_filter, len(col_filters))
1758
- else: # how == 'all'
1943
+ else: # how == 'all'
1759
1944
  filter_expr = "{0} > 0".format(fmt_filter)
1760
1945
 
1761
- new_nodeid= self._aed_utils._aed_filter(self._nodeid, filter_expr)
1946
+ new_nodeid = self._aed_utils._aed_filter(self._nodeid, filter_expr)
1762
1947
 
1763
1948
  # Get the updated metaexpr
1764
1949
  new_metaexpr = UtilFuncs._get_metaexpr_using_parent_metaexpr(new_nodeid, self._metaexpr)
@@ -1766,8 +1951,10 @@ class DataFrame():
1766
1951
  except TeradataMlException:
1767
1952
  raise
1768
1953
  except Exception as err:
1769
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR) from err
1954
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
1955
+ MessageCodes.TDMLDF_INFO_ERROR) from err
1770
1956
 
1957
+ @collect_queryband(queryband="DF_sort")
1771
1958
  def sort(self, columns, ascending=True):
1772
1959
  """
1773
1960
  DESCRIPTION:
@@ -1903,16 +2090,16 @@ class DataFrame():
1903
2090
  _Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
1904
2091
 
1905
2092
  try:
1906
- orderexpr=""
1907
- type_expr=[]
2093
+ orderexpr = ""
2094
+ type_expr = []
1908
2095
  invalid_types = []
1909
2096
  invalid_columns = []
1910
2097
  unsupported_types = ['BLOB', 'CLOB', 'ARRAY', 'VARRAY']
1911
2098
 
1912
2099
  if (isinstance(columns, str)):
1913
- columns=[columns]
2100
+ columns = [columns]
1914
2101
  if isinstance(ascending, bool):
1915
- ascending=[ascending] * len(columns)
2102
+ ascending = [ascending] * len(columns)
1916
2103
 
1917
2104
  # Validating lengths of passed arguments which are passed i.e. length of columns
1918
2105
  # must be same as ascending
@@ -1935,9 +2122,10 @@ class DataFrame():
1935
2122
  invalid_columns.append(column_name)
1936
2123
  invalid_types.append(col_type)
1937
2124
  if len(invalid_types) > 0:
1938
- raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_DATATYPE, ", ".join(invalid_columns), 'columns',
1939
- "Invalid", ", ".join(unsupported_types)),
1940
- MessageCodes.UNSUPPORTED_DATATYPE)
2125
+ raise TeradataMlException(
2126
+ Messages.get_message(MessageCodes.INVALID_COLUMN_DATATYPE, ", ".join(invalid_columns), 'columns',
2127
+ "Invalid", ", ".join(unsupported_types)),
2128
+ MessageCodes.UNSUPPORTED_DATATYPE)
1941
2129
 
1942
2130
  orderexpr = []
1943
2131
  get_column_order = lambda asc: 'ASC' if asc else 'DESC'
@@ -1974,7 +2162,8 @@ class DataFrame():
1974
2162
  except TeradataMlException:
1975
2163
  raise
1976
2164
 
1977
- def filter(self, items = None, like = None, regex = None, axis = 1, **kw):
2165
+ @collect_queryband(queryband="DF_filter")
2166
+ def filter(self, items=None, like=None, regex=None, axis=1, **kw):
1978
2167
  """
1979
2168
  DESCRIPTION:
1980
2169
  Filter rows or columns of dataframe according to labels in the specified index.
@@ -2239,12 +2428,12 @@ class DataFrame():
2239
2428
  op += 'regex'
2240
2429
  valid_value = type(regex) is str
2241
2430
 
2242
- if op not in('items', 'like', 'regex'):
2431
+ if op not in ('items', 'like', 'regex'):
2243
2432
  raise ValueError('Must use exactly one of the parameters items, like, and regex.')
2244
2433
 
2245
2434
  if not valid_value:
2246
- msg = 'The "items" parameter must be list of strings or tuples of column labels/index values. ' +\
2247
- 'The "regex" parameter and "like" parameter must be strings.'
2435
+ msg = 'The "items" parameter must be list of strings or tuples of column labels/index values. ' + \
2436
+ 'The "regex" parameter and "like" parameter must be strings.'
2248
2437
  raise TeradataMlException(msg, errcode)
2249
2438
 
2250
2439
  # validate multi index labels for items
@@ -2283,7 +2472,9 @@ class DataFrame():
2283
2472
  msg = Messages.get_message(errcode)
2284
2473
  raise TeradataMlException(msg, errcode)
2285
2474
 
2286
- def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None):
2475
+ @collect_queryband(queryband="DF_describe")
2476
+ def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None,
2477
+ columns=None):
2287
2478
  """
2288
2479
  DESCRIPTION:
2289
2480
  Generates statistics for numeric columns. This function can be used in two modes:
@@ -2355,6 +2546,13 @@ class DataFrame():
2355
2546
  Default Values: None
2356
2547
  Types: str or List of str
2357
2548
 
2549
+ columns:
2550
+ Optional Argument.
2551
+ Specifies the name(s) of the columns we are collecting statistics for.
2552
+ Default Values: None
2553
+ Types: str or List of str
2554
+
2555
+
2358
2556
  RETURNS:
2359
2557
  teradataml DataFrame
2360
2558
 
@@ -2629,19 +2827,27 @@ class DataFrame():
2629
2827
 
2630
2828
  # Argument validations
2631
2829
  awu_matrix = []
2830
+ awu_matrix.append(["columns", columns, True, (str, list), True])
2632
2831
  awu_matrix.append(["percentiles", percentiles, True, (float, list)])
2633
2832
  awu_matrix.append(["include", include, True, (str), True, [None, "all"]])
2634
2833
  awu_matrix.append(["verbose", verbose, True, (bool)])
2635
2834
  awu_matrix.append(["distinct", distinct, True, (bool)])
2636
- awu_matrix.append(["statistics", statistics, True, (str, list), True, ["count", "mean", "min", "max", "unique", "std", "describe", "percentile"]])
2835
+ awu_matrix.append(["statistics", statistics, True, (str, list), True,
2836
+ ["count", "mean", "min", "max", "unique", "std", "describe", "percentile"]])
2637
2837
 
2638
2838
  # Validate argument types
2639
2839
  _Validators._validate_function_arguments(awu_matrix)
2640
2840
 
2841
+ # Checking each element in passed columns to be valid column in dataframe
2842
+ _Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
2843
+
2641
2844
  # Validate argument values.
2642
2845
  if isinstance(percentiles, float):
2643
2846
  percentiles = [percentiles]
2644
2847
 
2848
+ if columns:
2849
+ columns = UtilFuncs._as_list(columns)
2850
+
2645
2851
  # Converting the statistics list to lower case
2646
2852
  if statistics:
2647
2853
  statistics = [stats.lower() for stats in UtilFuncs._as_list(statistics)]
@@ -2655,7 +2861,7 @@ class DataFrame():
2655
2861
  # Percentiles must be a list of values between 0 and 1.
2656
2862
  if not isinstance(percentiles, list) or not all(p > 0 and p < 1 for p in percentiles):
2657
2863
  raise ValueError(Messages.get_message(MessageCodes.INVALID_ARG_VALUE, percentiles, "percentiles",
2658
- "percentiles must be a list of values between 0 and 1"))
2864
+ "percentiles must be a list of values between 0 and 1"))
2659
2865
 
2660
2866
  # Argument 'include' with value 'all' is not allowed for DataFrameGroupByTime
2661
2867
  if include is not None and include.lower() == "all" and isinstance(self, DataFrameGroupByTime):
@@ -2679,11 +2885,16 @@ class DataFrame():
2679
2885
  groupby_column_list = None
2680
2886
  if isinstance(self, DataFrameGroupBy):
2681
2887
  groupby_column_list = self.groupby_column_list
2888
+ df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
2889
+ groupby_column_list=groupby_column_list)
2682
2890
 
2683
2891
  if isinstance(self, DataFrameGroupByTime):
2684
2892
  groupby_column_list = self.groupby_column_list
2893
+ df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
2894
+ groupby_column_list=groupby_column_list)
2895
+
2685
2896
  # Construct the aggregate query.
2686
- agg_query = df_utils._construct_describe_query(df=self, metaexpr=self._metaexpr,
2897
+ agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
2687
2898
  percentiles=percentiles, function_label=function_label,
2688
2899
  groupby_column_list=groupby_column_list, include=include,
2689
2900
  is_time_series_aggregate=True, verbose=verbose,
@@ -2695,7 +2906,7 @@ class DataFrame():
2695
2906
  fill=self._fill)
2696
2907
  else:
2697
2908
  # Construct the aggregate query.
2698
- agg_query = df_utils._construct_describe_query(df=self, metaexpr=self._metaexpr,
2909
+ agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
2699
2910
  percentiles=percentiles, function_label=function_label,
2700
2911
  groupby_column_list=groupby_column_list, include=include,
2701
2912
  is_time_series_aggregate=False, verbose=verbose,
@@ -2710,7 +2921,7 @@ class DataFrame():
2710
2921
  describe_df = df2
2711
2922
  else:
2712
2923
  describe_df = DataFrame.from_query(agg_query, index_label=function_label)
2713
-
2924
+
2714
2925
  # Check if numeric overflow can occur for result DataFrame.
2715
2926
  if self._check_numeric_overflow(describe_df):
2716
2927
  result_df = self._promote_dataframe_types()
@@ -2719,8 +2930,10 @@ class DataFrame():
2719
2930
  except TeradataMlException:
2720
2931
  raise
2721
2932
  except Exception as err:
2722
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR) from err
2933
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
2934
+ MessageCodes.TDMLDF_INFO_ERROR) from err
2723
2935
 
2936
+ @collect_queryband(queryband="DF_kurtosis")
2724
2937
  def kurtosis(self, distinct=False):
2725
2938
  """
2726
2939
  DESCRIPTION:
@@ -2863,6 +3076,7 @@ class DataFrame():
2863
3076
 
2864
3077
  return self._get_dataframe_aggregate(operation='kurtosis', distinct=distinct)
2865
3078
 
3079
+ @collect_queryband(queryband="DF_min")
2866
3080
  def min(self, distinct=False):
2867
3081
  """
2868
3082
  DESCRIPTION:
@@ -2993,6 +3207,7 @@ class DataFrame():
2993
3207
 
2994
3208
  return self._get_dataframe_aggregate(operation='min', distinct=distinct)
2995
3209
 
3210
+ @collect_queryband(queryband="DF_max")
2996
3211
  def max(self, distinct=False):
2997
3212
  """
2998
3213
  DESCRIPTION:
@@ -3124,6 +3339,7 @@ class DataFrame():
3124
3339
 
3125
3340
  return self._get_dataframe_aggregate(operation='max', distinct=distinct)
3126
3341
 
3342
+ @collect_queryband(queryband="DF_mean")
3127
3343
  def mean(self, distinct=False):
3128
3344
  """
3129
3345
  DESCRIPTION:
@@ -3247,8 +3463,9 @@ class DataFrame():
3247
3463
  # Validate argument types
3248
3464
  _Validators._validate_function_arguments(awu_matrix)
3249
3465
 
3250
- return self._get_dataframe_aggregate(operation='mean', distinct = distinct)
3466
+ return self._get_dataframe_aggregate(operation='mean', distinct=distinct)
3251
3467
 
3468
+ @collect_queryband(queryband="DF_skew")
3252
3469
  def skew(self, distinct=False):
3253
3470
  """
3254
3471
  DESCRIPTION:
@@ -3388,6 +3605,7 @@ class DataFrame():
3388
3605
 
3389
3606
  return self._get_dataframe_aggregate(operation='skew', distinct=distinct)
3390
3607
 
3608
+ @collect_queryband(queryband="DF_sum")
3391
3609
  def sum(self, distinct=False):
3392
3610
  """
3393
3611
  DESCRIPTION:
@@ -3513,6 +3731,7 @@ class DataFrame():
3513
3731
 
3514
3732
  return self._get_dataframe_aggregate(operation='sum', distinct=distinct)
3515
3733
 
3734
+ @collect_queryband(queryband="DF_count")
3516
3735
  def count(self, distinct=False):
3517
3736
  """
3518
3737
  DESCRIPTION:
@@ -3635,6 +3854,7 @@ class DataFrame():
3635
3854
  _Validators._validate_function_arguments(awu_matrix)
3636
3855
  return self._get_dataframe_aggregate(operation='count', distinct=distinct)
3637
3856
 
3857
+ @collect_queryband(queryband="DF_csum")
3638
3858
  def csum(self, sort_columns, drop_columns=False):
3639
3859
  """
3640
3860
  DESCRIPTION:
@@ -3738,6 +3958,7 @@ class DataFrame():
3738
3958
  self._validate_window_aggregates_arguments(sort_columns)
3739
3959
  return self._get_dataframe_aggregate(operation='csum', sort_columns=sort_columns, drop_columns=drop_columns)
3740
3960
 
3961
+ @collect_queryband(queryband="DF_msum")
3741
3962
  def msum(self, width, sort_columns, drop_columns=False):
3742
3963
  """
3743
3964
  DESCRIPTION:
@@ -3850,6 +4071,7 @@ class DataFrame():
3850
4071
  return self._get_dataframe_aggregate(
3851
4072
  operation='msum', width=width, sort_columns=sort_columns, drop_columns=drop_columns)
3852
4073
 
4074
+ @collect_queryband(queryband="DF_mavg")
3853
4075
  def mavg(self, width, sort_columns, drop_columns=False):
3854
4076
  """
3855
4077
  DESCRIPTION:
@@ -3963,6 +4185,7 @@ class DataFrame():
3963
4185
  return self._get_dataframe_aggregate(
3964
4186
  operation='mavg', width=width, sort_columns=sort_columns, drop_columns=drop_columns)
3965
4187
 
4188
+ @collect_queryband(queryband="DF_mdiff")
3966
4189
  def mdiff(self, width, sort_columns, drop_columns=False):
3967
4190
  """
3968
4191
  DESCRIPTION:
@@ -4076,6 +4299,7 @@ class DataFrame():
4076
4299
  return self._get_dataframe_aggregate(
4077
4300
  operation='mdiff', width=width, sort_columns=sort_columns, drop_columns=drop_columns)
4078
4301
 
4302
+ @collect_queryband(queryband="DF_mlinreg")
4079
4303
  def mlinreg(self, width, sort_column, drop_columns=False):
4080
4304
  """
4081
4305
  DESCRIPTION:
@@ -4266,6 +4490,7 @@ class DataFrame():
4266
4490
  _Validators._validate_unexpected_column_type(
4267
4491
  self, sort_columns, sort_columns_arg_name, _Dtypes._get_sort_unsupported_data_types())
4268
4492
 
4493
+ @collect_queryband(queryband="DF_std")
4269
4494
  def std(self, distinct=False, population=False):
4270
4495
  """
4271
4496
  DESCRIPTION:
@@ -4441,6 +4666,7 @@ class DataFrame():
4441
4666
  _Validators._validate_function_arguments(awu_matrix)
4442
4667
  return self._get_dataframe_aggregate(operation='std', distinct=distinct, population=population)
4443
4668
 
4669
+ @collect_queryband(queryband="DF_median")
4444
4670
  def median(self, distinct=False):
4445
4671
  """
4446
4672
  DESCRIPTION:
@@ -4566,12 +4792,14 @@ class DataFrame():
4566
4792
  _Validators._validate_function_arguments(awu_matrix)
4567
4793
 
4568
4794
  if distinct and not isinstance(self, DataFrameGroupByTime):
4569
- raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format('distinct', 'Aggregation',
4570
- 'True', 'median()',
4571
- 'DataFrameGroupByTime'))
4795
+ raise ValueError(
4796
+ Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format('distinct', 'Aggregation',
4797
+ 'True', 'median()',
4798
+ 'DataFrameGroupByTime'))
4572
4799
 
4573
- return self._get_dataframe_aggregate(operation = 'median', distinct = distinct)
4800
+ return self._get_dataframe_aggregate(operation='median', distinct=distinct)
4574
4801
 
4802
+ @collect_queryband(queryband="DF_var")
4575
4803
  def var(self, distinct=False, population=False):
4576
4804
  """
4577
4805
  DESCRIPTION:
@@ -4773,7 +5001,8 @@ class DataFrame():
4773
5001
 
4774
5002
  return self._get_dataframe_aggregate(operation='var', distinct=distinct, population=population)
4775
5003
 
4776
- def agg(self, func = None):
5004
+ @collect_queryband(queryband="DF_agg")
5005
+ def agg(self, func=None):
4777
5006
  """
4778
5007
  DESCRIPTION:
4779
5008
  Perform aggregates using one or more operations.
@@ -4918,10 +5147,12 @@ class DataFrame():
4918
5147
 
4919
5148
  if not isinstance(func, str) and not isinstance(func, list) and not isinstance(func, dict):
4920
5149
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
4921
- 'func', ['str', 'list', 'dict']), MessageCodes.UNSUPPORTED_DATATYPE)
5150
+ 'func', ['str', 'list', 'dict']),
5151
+ MessageCodes.UNSUPPORTED_DATATYPE)
4922
5152
 
4923
5153
  return self._get_dataframe_aggregate(func)
4924
5154
 
5155
+ @collect_queryband(arg_name="operation", prefix="DF")
4925
5156
  def _get_dataframe_aggregate(self, operation, **kwargs):
4926
5157
  """
4927
5158
  Returns the DataFrame given the aggregate operation or list of
@@ -5048,15 +5279,17 @@ class DataFrame():
5048
5279
  # Return Empty DataFrame if all the columns are selected in groupby as parent has
5049
5280
  if len(col_names) == 0:
5050
5281
  aggregate_expression, new_column_names, new_column_types = \
5051
- df_utils._construct_sql_expression_for_aggregations(self,
5052
- groupby_col_names, groupby_col_types, operation,
5053
- as_time_series_aggregate = is_time_series_aggregate, **kwargs)
5282
+ df_utils._construct_sql_expression_for_aggregations(self,
5283
+ groupby_col_names, groupby_col_types, operation,
5284
+ as_time_series_aggregate=is_time_series_aggregate,
5285
+ **kwargs)
5054
5286
  self._index_label = new_column_names
5055
5287
  else:
5056
5288
  aggregate_expression, new_column_names, new_column_types = \
5057
- df_utils._construct_sql_expression_for_aggregations(self,
5058
- col_names, col_types, operation, as_time_series_aggregate = is_time_series_aggregate,
5059
- **kwargs)
5289
+ df_utils._construct_sql_expression_for_aggregations(self,
5290
+ col_names, col_types, operation,
5291
+ as_time_series_aggregate=is_time_series_aggregate,
5292
+ **kwargs)
5060
5293
  new_column_names = pti_default_cols_proj + groupby_col_names + new_column_names
5061
5294
  new_column_types = pti_default_cols_types + groupby_col_types + new_column_types
5062
5295
 
@@ -5069,7 +5302,7 @@ class DataFrame():
5069
5302
  new_metaexpr = UtilFuncs._get_metaexpr_using_columns(aggregate_node_id,
5070
5303
  zip(new_column_names,
5071
5304
  new_column_types))
5072
- agg_df = self._create_dataframe_from_node\
5305
+ agg_df = self._create_dataframe_from_node \
5073
5306
  (aggregate_node_id, new_metaexpr, self._index_label)
5074
5307
 
5075
5308
  if (operation in ["sum", "csum", "mean"] and self._check_numeric_overflow(agg_df)):
@@ -5081,7 +5314,7 @@ class DataFrame():
5081
5314
  drop_columns=kwargs.get("drop_columns"))
5082
5315
  else:
5083
5316
  agg_df = getattr(promoted_df, operation)(distinct=kwargs.get("distinct"))
5084
-
5317
+
5085
5318
  return agg_df
5086
5319
 
5087
5320
  except TeradataMlException:
@@ -5089,7 +5322,7 @@ class DataFrame():
5089
5322
  except Exception as err:
5090
5323
  raise TeradataMlException(Messages.get_message(
5091
5324
  MessageCodes.EXECUTION_FAILED, "perform {} on DataFrame".format(operation), str(err)),
5092
- MessageCodes.EXECUTION_FAILED) from err
5325
+ MessageCodes.EXECUTION_FAILED) from err
5093
5326
 
5094
5327
  def _check_numeric_overflow(self, result_df):
5095
5328
  """
@@ -5110,7 +5343,7 @@ class DataFrame():
5110
5343
 
5111
5344
  EXAMPLES :
5112
5345
  result = self._check_numeric_overflow(agg_df)
5113
- """
5346
+ """
5114
5347
  try:
5115
5348
  repr(result_df)
5116
5349
  return False
@@ -5119,7 +5352,7 @@ class DataFrame():
5119
5352
  return True
5120
5353
  else:
5121
5354
  raise tme
5122
-
5355
+
5123
5356
  def _promote_dataframe_types(self):
5124
5357
  """
5125
5358
  Function promotes numeric column type to higher type to avoid numeric overflow
@@ -5152,9 +5385,8 @@ class DataFrame():
5152
5385
  new_cols[col] = self[col].cast(type_=next_type[self[col].type.__class__])
5153
5386
  else:
5154
5387
  new_cols[col] = self[col]
5155
-
5388
+
5156
5389
  return self.assign(True, **new_cols)
5157
-
5158
5390
 
5159
5391
  def __repr__(self):
5160
5392
  """
@@ -5179,16 +5411,7 @@ class DataFrame():
5179
5411
  """
5180
5412
  try:
5181
5413
 
5182
- # Generate/Execute AED nodes
5183
- self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
5184
-
5185
- query = repr(self._metaexpr) + ' FROM ' + self._table_name
5186
-
5187
- if self._orderby is not None:
5188
- query += ' ORDER BY ' + self._orderby
5189
-
5190
- # Execute the query and get the results in a list and create a Pandas DataFrame from the same.
5191
- data, columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
5414
+ data, columns = self.__get_data_columns()
5192
5415
  pandas_df = pd.DataFrame.from_records(data, columns=columns, coerce_float=True)
5193
5416
 
5194
5417
  if self._index_label:
@@ -5196,7 +5419,7 @@ class DataFrame():
5196
5419
 
5197
5420
  if self._undropped_index is not None:
5198
5421
  for col in self._undropped_index:
5199
- pandas_df.insert(0, col, pandas_df.index.get_level_values(col).tolist(), allow_duplicates = True)
5422
+ pandas_df.insert(0, col, pandas_df.index.get_level_values(col).tolist(), allow_duplicates=True)
5200
5423
 
5201
5424
  return pandas_df.to_string()
5202
5425
 
@@ -5209,18 +5432,15 @@ class DataFrame():
5209
5432
 
5210
5433
  def _repr_html_(self):
5211
5434
  """ Print method for teradataml for iPython rich display. """
5212
- # Generate/Execute AED nodes
5213
- self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
5214
-
5215
- query = repr(self._metaexpr) + ' FROM ' + self._table_name
5216
5435
 
5217
- if self._orderby is not None:
5218
- query += ' ORDER BY ' + self._orderby
5219
-
5220
- # Execute the query and get the results in a list.
5221
- data, columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
5436
+ # Check if class attributes __data and __data_columns are not None.
5437
+ # If not None, reuse the data and columns.
5438
+ # If None, generate latest results.
5439
+ if self.__data is None and self.__data_columns is None:
5440
+ self.__get_data_columns()
5222
5441
 
5223
- # Generate the HTML content from data and columns.
5442
+ # Generate the HTML content from the class attributes __data and __data_columns
5443
+ # which are updated by _repr_() function call which always executes before _repr_html_().
5224
5444
  indent = "\t"
5225
5445
  dindent = indent + indent
5226
5446
 
@@ -5233,11 +5453,11 @@ class DataFrame():
5233
5453
  html = "\n{0}".format(indent).join(header_html)
5234
5454
  html += '<html><table>\n{0}<tr id="HeaderRow">\n'.format(indent)
5235
5455
 
5236
- columns_html = "</th>\n{0}<th>".format(dindent).join(columns)
5456
+ columns_html = "</th>\n{0}<th>".format(dindent).join(self.__data_columns)
5237
5457
  html += "{0}<th>{1}</th>\n".format(dindent, columns_html)
5238
5458
  html += "{0}</tr>\n".format(indent)
5239
5459
 
5240
- for row in data:
5460
+ for row in self.__data:
5241
5461
  row_html = ["{0}<td>{1}</td>\n".format(dindent,
5242
5462
  cell) for cell in row]
5243
5463
  html += "{1}<tr>\n{0}{1}</tr>\n".format("".join(row_html), indent)
@@ -5246,6 +5466,34 @@ class DataFrame():
5246
5466
 
5247
5467
  return html
5248
5468
 
5469
+ def __get_data_columns(self):
5470
+ """
5471
+ DESCRIPTION:
5472
+ Internal function to execute the node and get the result.
5473
+
5474
+ RETURNS:
5475
+ tuple, first element represents data for the underlying query
5476
+ and second element represents the column names.
5477
+
5478
+ RAISES:
5479
+ None.
5480
+
5481
+ EXAMPLES:
5482
+ self.__get_data_columns()
5483
+ """
5484
+ self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
5485
+
5486
+ query = repr(self._metaexpr) + ' FROM ' + self._table_name
5487
+
5488
+ if self._orderby is not None:
5489
+ query += ' ORDER BY ' + self._orderby
5490
+
5491
+ # Execute the query and get the results in a list.
5492
+ self.__data, self.__data_columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
5493
+
5494
+ return self.__data, self.__data_columns
5495
+
5496
+ @collect_queryband(queryband="DF_select")
5249
5497
  def select(self, select_expression):
5250
5498
  """
5251
5499
  DESCRIPTION:
@@ -5340,7 +5588,8 @@ class DataFrame():
5340
5588
  """
5341
5589
  try:
5342
5590
  if self._metaexpr is None:
5343
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR), MessageCodes.TDMLDF_INFO_ERROR)
5591
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
5592
+ MessageCodes.TDMLDF_INFO_ERROR)
5344
5593
 
5345
5594
  # If invalid, appropriate exception raised; Processing ahead only for valid expressions
5346
5595
  select_exp_col_list = self.__validate_select_expression(select_expression)
@@ -5398,17 +5647,20 @@ class DataFrame():
5398
5647
 
5399
5648
  # TODO: Remove this check when same column multiple selection enabled
5400
5649
  if len(select_exp_col_list) > len(df_column_list):
5401
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_COLUMN, ', '.join(df_column_list)),
5402
- MessageCodes.TDMLDF_SELECT_INVALID_COLUMN)
5650
+ raise TeradataMlException(
5651
+ Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_COLUMN, ', '.join(df_column_list)),
5652
+ MessageCodes.TDMLDF_SELECT_INVALID_COLUMN)
5403
5653
 
5404
- all_cols_exist = all(col in df_column_list for col in select_exp_col_list)
5654
+ all_cols_exist = all(col in df_column_list for col in select_exp_col_list)
5405
5655
 
5406
5656
  if not all_cols_exist:
5407
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_COLUMN, ', '.join(df_column_list)),
5408
- MessageCodes.TDMLDF_SELECT_INVALID_COLUMN)
5657
+ raise TeradataMlException(
5658
+ Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_COLUMN, ', '.join(df_column_list)),
5659
+ MessageCodes.TDMLDF_SELECT_INVALID_COLUMN)
5409
5660
 
5410
5661
  return select_exp_col_list
5411
5662
 
5663
+ @collect_queryband(queryband="DF_toPandas")
5412
5664
  def to_pandas(self, index_column=None, num_rows=99999, all_rows=False,
5413
5665
  fastexport=False, catch_errors_warnings=False, **kwargs):
5414
5666
  """
@@ -5798,7 +6050,7 @@ class DataFrame():
5798
6050
  "Pandas DataFrame", str(err)),
5799
6051
  MessageCodes.DATA_EXPORT_FAILED)
5800
6052
 
5801
- @argument_deprecation("future", ["lsuffix", "rsuffix"], True, ["lprefix", "rprefix"])
6053
+ @collect_queryband(queryband="DF_join")
5802
6054
  def join(self, other, on=None, how="left", lsuffix=None, rsuffix=None,
5803
6055
  lprefix=None, rprefix=None):
5804
6056
  """
@@ -5875,18 +6127,12 @@ class DataFrame():
5875
6127
  lsuffix:
5876
6128
  Optional Argument.
5877
6129
  Specifies the suffix to be added to the left table columns.
5878
- Note:
5879
- Behavior of the argument will change in future.
5880
- Use "lprefix" instead.
5881
6130
  Default Value: None.
5882
6131
  Types: str
5883
6132
 
5884
6133
  rsuffix:
5885
6134
  Optional Argument.
5886
6135
  Specifies the suffix to be added to the right table columns.
5887
- Note:
5888
- Behavior of the argument will change in future.
5889
- Use "rprefix" instead.
5890
6136
  Default Value: None.
5891
6137
  Types: str
5892
6138
 
@@ -6045,15 +6291,13 @@ class DataFrame():
6045
6291
  # Validate argument types
6046
6292
  _Validators._validate_function_arguments(awu_matrix)
6047
6293
 
6048
-
6049
-
6050
6294
  # If user has not provided suffix argument(s), then prefix argument(s) value(s) are passed by
6051
6295
  # user hence we will set the affix variables (laffix and raffix) with provided value(s).
6052
6296
  # affix_type is also set appropriately.
6053
6297
  if lsuffix is not None or rsuffix is not None:
6054
6298
  laffix = lsuffix
6055
6299
  raffix = rsuffix
6056
- affix_type = "suffix"
6300
+ affix_type = "suffix"
6057
6301
  else:
6058
6302
  laffix = lprefix
6059
6303
  raffix = rprefix
@@ -6079,12 +6323,13 @@ class DataFrame():
6079
6323
  if column in other_columns_lower_actual_map.keys():
6080
6324
  if laffix is None and raffix is None:
6081
6325
  raise TeradataMlException(
6082
- Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
6083
-
6326
+ Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
6327
+ MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
6328
+
6084
6329
  # Both affixes should not be equal to perform join.
6085
6330
  if laffix == raffix and laffix is not None:
6086
6331
  raise TeradataMlException(
6087
- Messages.get_message(MessageCodes.TDMLDF_INVALID_TABLE_ALIAS,
6332
+ Messages.get_message(MessageCodes.TDMLDF_INVALID_TABLE_ALIAS,
6088
6333
  "'l{affix_type}' and 'r{affix_type}'".format(affix_type=affix_type)),
6089
6334
  MessageCodes.TDMLDF_INVALID_TABLE_ALIAS)
6090
6335
 
@@ -6120,7 +6365,7 @@ class DataFrame():
6120
6365
 
6121
6366
  if isinstance(ori_condition, str):
6122
6367
  columns = [column.strip() for column in condition.split(sep=conditional_separator)
6123
- if len(column) > 0]
6368
+ if len(column) > 0]
6124
6369
 
6125
6370
  if len(columns) != 2:
6126
6371
  invalid_join_conditions.append(condition)
@@ -6135,7 +6380,8 @@ class DataFrame():
6135
6380
 
6136
6381
  if len(invalid_join_conditions) > 0:
6137
6382
  raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_JOIN_CONDITION,
6138
- ", ".join(invalid_join_conditions)), MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
6383
+ ", ".join(invalid_join_conditions)),
6384
+ MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
6139
6385
 
6140
6386
  join_condition = " and ".join(all_join_conditions)
6141
6387
  else:
@@ -6156,14 +6402,16 @@ class DataFrame():
6156
6402
  df1_column_with_affix = self.__check_and_return_new_column_name(laffix, other_column,
6157
6403
  other_columns_lower_actual_map.keys(),
6158
6404
  "right", affix_type)
6159
- select_columns.append("{0} as {1}".format(self.__get_fully_qualified_col_name(other_column, "df1" if laffix is None else laffix),
6160
- df1_column_with_affix))
6405
+ select_columns.append("{0} as {1}".format(
6406
+ self.__get_fully_qualified_col_name(other_column, "df1" if laffix is None else laffix),
6407
+ df1_column_with_affix))
6161
6408
 
6162
6409
  df2_column_with_affix = self.__check_and_return_new_column_name(raffix, column,
6163
6410
  self_columns_lower_actual_map.keys(),
6164
6411
  "left", affix_type)
6165
- select_columns.append("{0} as {1}".format(self.__get_fully_qualified_col_name(column, "df2" if raffix is None else raffix),
6166
- df2_column_with_affix))
6412
+ select_columns.append("{0} as {1}".format(
6413
+ self.__get_fully_qualified_col_name(column, "df2" if raffix is None else raffix),
6414
+ df2_column_with_affix))
6167
6415
 
6168
6416
  # As we are creating new column name, adding it to new metadata dict for new dataframe from join.
6169
6417
  self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
@@ -6173,7 +6421,7 @@ class DataFrame():
6173
6421
  self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
6174
6422
  UtilFuncs._teradata_unquote_arg(df2_column_with_affix, "\""),
6175
6423
  other_column, df2_columns_types)
6176
-
6424
+
6177
6425
  else:
6178
6426
  # As column not present in right DataFrame, directly adding column to new metadata dict.
6179
6427
  self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, df1_columns_types)
@@ -6187,7 +6435,7 @@ class DataFrame():
6187
6435
 
6188
6436
  # Create a node in AED using _aed_join
6189
6437
  join_node_id = self._aed_utils._aed_join(self._nodeid, other._nodeid, ", ".join(select_columns), how_lc,
6190
- join_condition, "df1" if laffix is None else laffix,
6438
+ join_condition, "df1" if laffix is None else laffix,
6191
6439
  "df2" if raffix is None else raffix)
6192
6440
 
6193
6441
  # Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid and underlying table name.
@@ -6274,9 +6522,7 @@ class DataFrame():
6274
6522
  """
6275
6523
  if affix is None:
6276
6524
  return UtilFuncs._teradata_quote_arg(column, "\"", False)
6277
-
6278
- affix_type = "prefix" # TODO: [ELE-5480] Remove this line to enable suffix addition.
6279
-
6525
+
6280
6526
  # If Prefix, affix is added before column name else it is appended.
6281
6527
  df1_column_with_affix = "{0}_{1}" if affix_type == "prefix" else "{1}_{0}"
6282
6528
  df1_column_with_affix = df1_column_with_affix.format(affix,
@@ -6292,7 +6538,7 @@ class DataFrame():
6292
6538
  MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS)
6293
6539
  return UtilFuncs._teradata_quote_arg(df1_column_with_affix, "\"", False)
6294
6540
 
6295
- def __add_column_type_item_to_dict(self, new_metadata_dict, new_column,column, column_types):
6541
+ def __add_column_type_item_to_dict(self, new_metadata_dict, new_column, column, column_types):
6296
6542
  """
6297
6543
  Add a column as key and datatype as a value to dictionary
6298
6544
 
@@ -6355,20 +6601,20 @@ class DataFrame():
6355
6601
  return final
6356
6602
  else:
6357
6603
  return colnames_list
6358
-
6604
+
6359
6605
  elif kind == 'mergesort':
6360
6606
  if ascending == True:
6361
6607
  return sorted(colnames_list)
6362
6608
  else:
6363
- return sorted(colnames_list, reverse=True)
6364
-
6609
+ return sorted(colnames_list, reverse=True)
6610
+
6365
6611
  elif kind == 'heapsort':
6366
- end = len(colnames_list)
6612
+ end = len(colnames_list)
6367
6613
  start = end // 2 - 1
6368
- for i in range(start, -1, -1):
6369
- self.__get_heap(colnames_list, end, i)
6370
- for i in range(end-1, 0, -1):
6371
- #swap(i, 0)
6614
+ for i in range(start, -1, -1):
6615
+ self.__get_heap(colnames_list, end, i)
6616
+ for i in range(end - 1, 0, -1):
6617
+ # swap(i, 0)
6372
6618
  colnames_list[i], colnames_list[0] = colnames_list[0], colnames_list[i]
6373
6619
  colnames_list = self.__get_heap(colnames_list, i, 0)
6374
6620
  if ascending == True:
@@ -6394,9 +6640,9 @@ class DataFrame():
6394
6640
  RETURNS:
6395
6641
  Sorted list of column names indexed at i
6396
6642
  """
6397
- l=2 * i + 1
6398
- r=2 * (i + 1)
6399
- max=i
6643
+ l = 2 * i + 1
6644
+ r = 2 * (i + 1)
6645
+ max = i
6400
6646
  if l < n and colnames_list[i] < colnames_list[l]:
6401
6647
  max = l
6402
6648
  if r < n and colnames_list[max] < colnames_list[r]:
@@ -6406,7 +6652,8 @@ class DataFrame():
6406
6652
  self.__get_heap(colnames_list, n, max)
6407
6653
  return colnames_list
6408
6654
 
6409
- def to_sql(self, table_name, if_exists='fail', primary_index=None, temporary=False, schema_name=None, types = None,
6655
+ @collect_queryband(queryband="DF_toSql")
6656
+ def to_sql(self, table_name, if_exists='fail', primary_index=None, temporary=False, schema_name=None, types=None,
6410
6657
  primary_time_index_name=None, timecode_column=None, timebucket_duration=None,
6411
6658
  timezero_date=None, columns_list=None, sequence_column=None, seq_max=None, set_table=False):
6412
6659
  """
@@ -6619,13 +6866,13 @@ class DataFrame():
6619
6866
 
6620
6867
  """
6621
6868
 
6622
- return copy_to_sql(df = self, table_name = table_name, schema_name = schema_name,
6623
- index = False, index_label = None, temporary = temporary,
6624
- primary_index = primary_index, if_exists = if_exists, types = types,
6625
- primary_time_index_name = primary_time_index_name, timecode_column = timecode_column,
6626
- timebucket_duration = timebucket_duration, timezero_date = timezero_date, columns_list = columns_list,
6627
- sequence_column = sequence_column, seq_max = seq_max, set_table = set_table)
6628
-
6869
+ return copy_to_sql(df=self, table_name=table_name, schema_name=schema_name,
6870
+ index=False, index_label=None, temporary=temporary,
6871
+ primary_index=primary_index, if_exists=if_exists, types=types,
6872
+ primary_time_index_name=primary_time_index_name, timecode_column=timecode_column,
6873
+ timebucket_duration=timebucket_duration, timezero_date=timezero_date,
6874
+ columns_list=columns_list,
6875
+ sequence_column=sequence_column, seq_max=seq_max, set_table=set_table)
6629
6876
 
6630
6877
  def _get_assign_allowed_types(self):
6631
6878
  """
@@ -6694,7 +6941,7 @@ class DataFrame():
6694
6941
  new_meta = UtilFuncs._get_metaexpr_using_parent_metaexpr(new_nodeid, new_meta)
6695
6942
  return (new_meta, new_nodeid)
6696
6943
 
6697
- def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns = None):
6944
+ def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
6698
6945
  """
6699
6946
  DESCRIPTION:
6700
6947
  Function to create a teradataml DataFrame from node.
@@ -6702,7 +6949,7 @@ class DataFrame():
6702
6949
  be overridden by the child classes if required.
6703
6950
 
6704
6951
  For example,
6705
- This will always returns a teradataml DataFrame, but for
6952
+ This will always return a teradataml DataFrame, but for
6706
6953
  GeoDataFrame, we will return teradataml DataFrame or teradataml
6707
6954
  GeoDataFrame, based on whether the resultant DataFrame contains
6708
6955
  geometry column or not.
@@ -6786,7 +7033,8 @@ class DataFrame():
6786
7033
  self.__execute_node_and_set_table_name(self._nodeid)
6787
7034
  return True
6788
7035
 
6789
- def assign(self, drop_columns = False, **kwargs):
7036
+ @collect_queryband(queryband="DF_assign")
7037
+ def assign(self, drop_columns=False, **kwargs):
6790
7038
  """
6791
7039
  DESCRIPTION:
6792
7040
  Assign new columns to a teradataml DataFrame.
@@ -7127,12 +7375,12 @@ class DataFrame():
7127
7375
  is_allowed = lambda x: isinstance(*x) and type(x[0]) != bool
7128
7376
  value_type_allowed = map(is_allowed, ((val, t) for t in allowed_types))
7129
7377
 
7130
- #if callable(val):
7378
+ # if callable(val):
7131
7379
  # err = 'Unsupported callable value for key: {}'.format(key)
7132
7380
  # raise ValueError(err)
7133
7381
 
7134
7382
  if not any(list(value_type_allowed)):
7135
- err = 'Unsupported values of type {t} for key {k}'.format(k = key, t = type(val))
7383
+ err = 'Unsupported values of type {t} for key {k}'.format(k=key, t=type(val))
7136
7384
  raise ValueError(err)
7137
7385
 
7138
7386
  if isinstance(val, ClauseElement) and not node_executed:
@@ -7153,6 +7401,7 @@ class DataFrame():
7153
7401
  msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
7154
7402
  raise TeradataMlException(msg, errcode) from err
7155
7403
 
7404
+ @collect_queryband(queryband="DF_get")
7156
7405
  def get(self, key):
7157
7406
  """
7158
7407
  DESCRIPTION:
@@ -7232,7 +7481,8 @@ class DataFrame():
7232
7481
  """
7233
7482
  return self.select(key)
7234
7483
 
7235
- def set_index(self, keys, drop = True, append = False):
7484
+ @collect_queryband(queryband="DF_setIndex")
7485
+ def set_index(self, keys, drop=True, append=False):
7236
7486
  """
7237
7487
  DESCRIPTION:
7238
7488
  Assigns one or more existing columns as the new index to a teradataml DataFrame.
@@ -7447,7 +7697,8 @@ class DataFrame():
7447
7697
  """
7448
7698
  return self._index_label
7449
7699
 
7450
- def groupby(self, columns_expr):
7700
+ @collect_queryband(queryband="DF_groupby")
7701
+ def groupby(self, columns_expr, **kwargs):
7451
7702
  """
7452
7703
  DESCRIPTION:
7453
7704
  Applies GroupBy to one or more columns of a teradataml Dataframe.
@@ -7460,6 +7711,16 @@ class DataFrame():
7460
7711
  Specifies the column name(s) to group by.
7461
7712
  Types: str OR list of Strings (str)
7462
7713
 
7714
+ kwargs:
7715
+ Optional Argument.
7716
+ Specifies keyword arguments.
7717
+
7718
+ option:
7719
+ Optional Argument.
7720
+ Specifies the groupby option.
7721
+ Permitted Values: "CUBE", "ROLLUP", None
7722
+ Types: str or NoneType
7723
+
7463
7724
  NOTES:
7464
7725
  1. Users can still apply teradataml DataFrame methods (filters/sort/etc) on top of the result.
7465
7726
  2. Consecutive operations of grouping, i.e., groupby_time(), resample() and groupby() are not permitted.
@@ -7486,26 +7747,31 @@ class DataFrame():
7486
7747
 
7487
7748
  """
7488
7749
  # Argument validations
7489
- awu_matrix = []
7490
- awu_matrix.append(["columns_expr", columns_expr, False, (str, list), True])
7750
+ arg_info_matrix = []
7751
+ arg_info_matrix.append(["columns_expr", columns_expr, False, (str, list), True])
7752
+ option = kwargs.get("option", None)
7753
+ arg_info_matrix.append(["option", option, True, (str, type(None)), True,
7754
+ ["CUBE", "ROLLUP", None]])
7491
7755
 
7492
7756
  # Validate argument types
7493
- _Validators._validate_function_arguments(awu_matrix)
7757
+ _Validators._validate_function_arguments(arg_info_matrix)
7494
7758
 
7495
7759
  # Checking each element in passed columns to be valid column in dataframe
7496
7760
  _Validators._validate_column_exists_in_dataframe(columns_expr, self._metaexpr)
7497
7761
 
7498
7762
  try:
7499
- column_list=[]
7500
- unsupported_types = ['BLOB', 'CLOB', 'PERIOD_DATE', 'PERIOD_TIME', 'PERIOD_TIMESTAMP', 'ARRAY', 'VARRAY', 'XML', 'JSON']
7501
- type_expr=[]
7763
+ column_list = []
7764
+ unsupported_types = ['BLOB', 'CLOB', 'PERIOD_DATE', 'PERIOD_TIME', 'PERIOD_TIMESTAMP', 'ARRAY', 'VARRAY',
7765
+ 'XML', 'JSON']
7766
+ type_expr = []
7502
7767
  invalid_types = []
7503
7768
  # check for consecutive groupby operations
7504
- if isinstance(self, DataFrameGroupBy) or isinstance(self, DataFrameGroupByTime) :
7505
- raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_OPERATION), MessageCodes.UNSUPPORTED_OPERATION)
7769
+ if isinstance(self, DataFrameGroupBy) or isinstance(self, DataFrameGroupByTime):
7770
+ raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_OPERATION),
7771
+ MessageCodes.UNSUPPORTED_OPERATION)
7506
7772
 
7507
7773
  if (isinstance(columns_expr, list)):
7508
- column_list=columns_expr
7774
+ column_list = columns_expr
7509
7775
 
7510
7776
  elif (isinstance(columns_expr, str)):
7511
7777
  column_list.append(columns_expr)
@@ -7530,15 +7796,15 @@ class DataFrame():
7530
7796
 
7531
7797
  groupbyexpr = ', '.join(UtilFuncs._teradata_quote_arg(col, "\"", False) for col in column_list)
7532
7798
  groupbyObj = DataFrameGroupBy(self._nodeid, self._metaexpr, self._column_names_and_types, self.columns,
7533
- groupbyexpr, column_list)
7799
+ groupbyexpr, column_list, option)
7534
7800
  return groupbyObj
7535
7801
  except TeradataMlException:
7536
7802
  raise
7537
7803
 
7538
- def __group_time_series_data(self, timebucket_duration, timebucket_duration_arg_name = "timebucket_duration",
7539
- value_expression = None, timecode_column = None,
7540
- timecode_column_arg_name = "timecode_column", sequence_column = None,
7541
- fill = None, fill_arg_name = "fill"):
7804
+ def __group_time_series_data(self, timebucket_duration, timebucket_duration_arg_name="timebucket_duration",
7805
+ value_expression=None, timecode_column=None,
7806
+ timecode_column_arg_name="timecode_column", sequence_column=None,
7807
+ fill=None, fill_arg_name="fill"):
7542
7808
  """
7543
7809
  DESCRIPTION:
7544
7810
  Internal function to resample/group time series data using Group By Time and a column.
@@ -7782,7 +8048,8 @@ class DataFrame():
7782
8048
 
7783
8049
  if len(invalid_types) > 0:
7784
8050
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, invalid_types,
7785
- "ANY, except following {}".format(unsupported_types)),
8051
+ "ANY, except following {}".format(
8052
+ unsupported_types)),
7786
8053
  MessageCodes.UNSUPPORTED_DATATYPE)
7787
8054
 
7788
8055
  groupby_column_expr = ', '.join(UtilFuncs._teradata_quote_arg(col, "\"", False)
@@ -7792,7 +8059,7 @@ class DataFrame():
7792
8059
 
7793
8060
  groupbyObj = DataFrameGroupByTime(nodeid=self._nodeid, metaexpr=self._metaexpr,
7794
8061
  column_names_and_types=self._column_names_and_types, columns=self.columns,
7795
- groupby_value_expr = groupby_column_expr,
8062
+ groupby_value_expr=groupby_column_expr,
7796
8063
  column_list=group_by_column_list, timebucket_duration=timebucket_duration,
7797
8064
  value_expression=value_expression, timecode_column=timecode_column,
7798
8065
  sequence_column=sequence_column, fill=fill)
@@ -7800,8 +8067,9 @@ class DataFrame():
7800
8067
  except TeradataMlException:
7801
8068
  raise
7802
8069
 
7803
- def groupby_time(self, timebucket_duration, value_expression = None, timecode_column = None, sequence_column = None,
7804
- fill = None):
8070
+ @collect_queryband(queryband="DF_groupbyTime")
8071
+ def groupby_time(self, timebucket_duration, value_expression=None, timecode_column=None, sequence_column=None,
8072
+ fill=None):
7805
8073
  """
7806
8074
  DESCRIPTION:
7807
8075
  Apply Group By Time to one or more columns of a teradataml DataFrame.
@@ -8079,11 +8347,12 @@ class DataFrame():
8079
8347
 
8080
8348
  """
8081
8349
  return self.__group_time_series_data(timebucket_duration=timebucket_duration, value_expression=value_expression,
8082
- timecode_column = timecode_column, sequence_column = sequence_column,
8083
- fill = fill)
8350
+ timecode_column=timecode_column, sequence_column=sequence_column,
8351
+ fill=fill)
8084
8352
 
8085
- def resample(self, rule, value_expression = None, on = None, sequence_column = None,
8086
- fill_method = None):
8353
+ @collect_queryband(queryband="DF_resample")
8354
+ def resample(self, rule, value_expression=None, on=None, sequence_column=None,
8355
+ fill_method=None):
8087
8356
  """
8088
8357
  DESCRIPTION:
8089
8358
  Resample time series data. This function allows grouping done by time on
@@ -8360,10 +8629,11 @@ class DataFrame():
8360
8629
  """
8361
8630
  return self.__group_time_series_data(timebucket_duration=rule, timebucket_duration_arg_name="rule",
8362
8631
  value_expression=value_expression, timecode_column_arg_name="on",
8363
- timecode_column = on, sequence_column = sequence_column,
8364
- fill = fill_method, fill_arg_name="fill_method")
8632
+ timecode_column=on, sequence_column=sequence_column,
8633
+ fill=fill_method, fill_arg_name="fill_method")
8365
8634
 
8366
- def get_values(self, num_rows = 99999):
8635
+ @collect_queryband(queryband="DF_getValues")
8636
+ def get_values(self, num_rows=99999):
8367
8637
  """
8368
8638
  DESCRIPTION:
8369
8639
  Retrieves all values (only) present in a teradataml DataFrame.
@@ -8548,6 +8818,7 @@ class DataFrame():
8548
8818
  dimension = self.shape
8549
8819
  return dimension[0] * dimension[1]
8550
8820
 
8821
+ @collect_queryband(queryband="DF_merge")
8551
8822
  def merge(self, right, on=None, how="inner", left_on=None, right_on=None, use_index=False,
8552
8823
  lsuffix=None, rsuffix=None):
8553
8824
  """
@@ -8555,6 +8826,7 @@ class DataFrame():
8555
8826
  Merges two teradataml DataFrames together.
8556
8827
 
8557
8828
  Supported merge operations are:
8829
+ - cross: Returns cartesian product between the two dataframes.
8558
8830
  - inner: Returns only matching rows, non-matching rows are eliminated.
8559
8831
  - left: Returns all matching rows plus non-matching rows from the left teradataml DataFrame.
8560
8832
  - right: Returns all matching rows plus non-matching rows from the right teradataml DataFrame.
@@ -8767,27 +9039,26 @@ class DataFrame():
8767
9039
  Messages.get_message(MessageCodes.MUST_PASS_ARGUMENT, "left_on", "right_on"),
8768
9040
  MessageCodes.MUST_PASS_ARGUMENT)
8769
9041
 
8770
- if isinstance(on,list):
9042
+ if isinstance(on, list):
8771
9043
  join_conditions = on
8772
9044
  elif isinstance(on, (str, ColumnExpression)):
8773
9045
  join_conditions = [on]
8774
9046
  else:
8775
9047
  join_conditions = []
8776
9048
 
8777
-
8778
9049
  if isinstance(left_on, list) and isinstance(right_on, list) and len(left_on) != len(right_on):
8779
9050
  raise TeradataMlException(
8780
- Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
8781
- MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
9051
+ Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
9052
+ MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
8782
9053
 
8783
9054
  elif isinstance(left_on, list) and isinstance(right_on, (str, ColumnExpression)) and len(left_on) != 1:
8784
9055
  raise TeradataMlException(
8785
- Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
8786
- MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
9056
+ Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
9057
+ MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
8787
9058
 
8788
9059
  elif isinstance(right_on, list) and isinstance(left_on, (str, ColumnExpression)) and len(right_on) != 1:
8789
9060
  raise TeradataMlException(
8790
- Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
9061
+ Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
8791
9062
  MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
8792
9063
 
8793
9064
  if left_on is not None and not isinstance(left_on, list):
@@ -8812,7 +9083,6 @@ class DataFrame():
8812
9083
  if isinstance(right_on[index], ColumnExpression):
8813
9084
  right_on[index] = right_on[index].compile()
8814
9085
 
8815
-
8816
9086
  if left_on is not None and right_on is not None:
8817
9087
  for left_column, right_column in zip(left_on, right_on):
8818
9088
  join_conditions.append("{} = {}".format(tdp.quote(left_column), tdp.quote(right_column)))
@@ -8828,7 +9098,7 @@ class DataFrame():
8828
9098
 
8829
9099
  if use_index:
8830
9100
  if self._index_label is None or right._index_label is None:
8831
- raise TeradataMlException(
9101
+ raise TeradataMlException(
8832
9102
  Messages.get_message(MessageCodes.TDMLDF_INDEXES_ARE_NONE), MessageCodes.TDMLDF_INDEXES_ARE_NONE)
8833
9103
 
8834
9104
  left_index_labels = self._index_label
@@ -8841,9 +9111,9 @@ class DataFrame():
8841
9111
  for left_index_label, right_index_label in zip(left_index_labels, right_index_labels):
8842
9112
  join_conditions.append("{} = {}".format(tdp.quote(left_index_label), tdp.quote(right_index_label)))
8843
9113
 
8844
-
8845
9114
  return self.join(other=right, on=join_conditions, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
8846
9115
 
9116
+ @collect_queryband(queryband="DF_squeeze")
8847
9117
  def squeeze(self, axis=None):
8848
9118
  """
8849
9119
  DESCRIPTION:
@@ -8955,7 +9225,7 @@ class DataFrame():
8955
9225
  num_row, num_col = self.shape
8956
9226
 
8957
9227
  # Check if the number of elements in DF = 1
8958
- if (num_row, num_col) == (1,1) and axis is None:
9228
+ if (num_row, num_col) == (1, 1) and axis is None:
8959
9229
  # To get the single row/column value in the DF, we need to execute the node
8960
9230
  # Generate/Execute AED nodes
8961
9231
  self.__execute_node_and_set_table_name(self._nodeid)
@@ -8981,12 +9251,13 @@ class DataFrame():
8981
9251
  return self
8982
9252
 
8983
9253
  if axis == 1:
8984
- return Series._from_dataframe(self, axis = 1)
9254
+ return Series._from_dataframe(self, axis=1)
8985
9255
  else:
8986
9256
  # TODO : Research and add capabilities to handle rowexpression based return objects
8987
9257
  # For now, returning the DataFrame as is
8988
9258
  return self
8989
9259
 
9260
+ @collect_queryband(queryband="DF_sortIndex")
8990
9261
  def sort_index(self, axis=0, ascending=True, kind='quicksort'):
8991
9262
  """
8992
9263
  DESCRIPTION:
@@ -9093,6 +9364,7 @@ class DataFrame():
9093
9364
  except TeradataMlException:
9094
9365
  raise
9095
9366
 
9367
+ @collect_queryband(queryband="DF_concat")
9096
9368
  def concat(self, other, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
9097
9369
  """
9098
9370
  DESCRIPTION:
@@ -9278,14 +9550,14 @@ class DataFrame():
9278
9550
  if isinstance(samples, float) and samples > 1:
9279
9551
  raise TeradataMlException(
9280
9552
  Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
9281
- "greater than 0 and less than or equal to 1"),
9282
- MessageCodes.INVALID_ARG_VALUE)
9553
+ "greater than 0 and less than or equal to 1"),
9554
+ MessageCodes.INVALID_ARG_VALUE)
9283
9555
  if isinstance(samples, list) and all(isinstance(item, float) for item in samples) \
9284
- and sum(samples) > 1:
9556
+ and sum(samples) > 1:
9285
9557
  raise TeradataMlException(
9286
9558
  Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
9287
- "a list having sum of all elements greater than 0 and less than or equal to 1" ),
9288
- MessageCodes.INVALID_ARG_VALUE)
9559
+ "a list having sum of all elements greater than 0 and less than or equal to 1"),
9560
+ MessageCodes.INVALID_ARG_VALUE)
9289
9561
 
9290
9562
  return True
9291
9563
 
@@ -9316,10 +9588,10 @@ class DataFrame():
9316
9588
 
9317
9589
  # Raise exception if the length of list is greater than 16.
9318
9590
  if len(samples) > 16:
9319
- raise TeradataMlException(
9591
+ raise TeradataMlException(
9320
9592
  Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
9321
- "a list having less than or equal to 16 samples"),
9322
- MessageCodes.INVALID_ARG_VALUE)
9593
+ "a list having less than or equal to 16 samples"),
9594
+ MessageCodes.INVALID_ARG_VALUE)
9323
9595
 
9324
9596
  return True
9325
9597
 
@@ -9354,26 +9626,26 @@ class DataFrame():
9354
9626
 
9355
9627
  # Raise exception if number of rows given are negative.
9356
9628
  if isinstance(samples, (int, float)) and samples < 0 or isinstance(samples, list) \
9357
- and any(item < 0 for item in samples):
9629
+ and any(item < 0 for item in samples):
9358
9630
  raise TeradataMlException(
9359
9631
  Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
9360
- "greater than 0" ),
9361
- MessageCodes.INVALID_ARG_VALUE)
9632
+ "greater than 0"),
9633
+ MessageCodes.INVALID_ARG_VALUE)
9362
9634
 
9363
9635
  # Raise exception if fractions specified as 0.
9364
- if isinstance(samples, float) and samples == 0 or (isinstance(samples, list) \
9365
- and all(isinstance(item, float) for item in samples)
9366
- and any(item == 0 for item in samples)):
9636
+ if isinstance(samples, float) and samples == 0 or (isinstance(samples, list) \
9637
+ and all(isinstance(item, float) for item in samples)
9638
+ and any(item == 0 for item in samples)):
9367
9639
  raise TeradataMlException(
9368
9640
  Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
9369
- "greater than 0" ),
9370
- MessageCodes.INVALID_ARG_VALUE)
9371
-
9641
+ "greater than 0"),
9642
+ MessageCodes.INVALID_ARG_VALUE)
9372
9643
 
9373
9644
  return True
9374
9645
 
9375
- def sample(self, n = None, frac = None, replace = False, randomize = False, case_when_then = None, case_else = None,
9376
- stratify_column = None, seed = None, id_column = None):
9646
+ @collect_queryband(queryband="DF_sample")
9647
+ def sample(self, n=None, frac=None, replace=False, randomize=False, case_when_then=None, case_else=None,
9648
+ stratify_column=None, seed=None, id_column=None):
9377
9649
  """
9378
9650
  DESCRIPTION:
9379
9651
  Allows to sample few rows from dataframe directly or based on conditions.
@@ -9679,21 +9951,21 @@ class DataFrame():
9679
9951
  try:
9680
9952
  if n is not None and frac is not None:
9681
9953
  raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
9682
- "n", "frac"),
9683
- MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9954
+ "n", "frac"),
9955
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9684
9956
  if n is not None and case_when_then is not None:
9685
9957
  raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
9686
- "n", "case_when_then"),
9687
- MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9958
+ "n", "case_when_then"),
9959
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9688
9960
  if frac is not None and case_when_then is not None:
9689
9961
  raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
9690
- "frac", "case_when_then"),
9691
- MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9962
+ "frac", "case_when_then"),
9963
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9692
9964
  _Validators._validate_dependent_argument("case_else", case_else, "case_when_then", case_when_then)
9693
9965
  if n is None and frac is None and case_when_then is None:
9694
- raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
9695
- "n or frac", "case_when_then"),
9696
- MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9966
+ raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
9967
+ "n or frac", "case_when_then"),
9968
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
9697
9969
 
9698
9970
  # Argument validations
9699
9971
  awu_matrix = []
@@ -9739,7 +10011,7 @@ class DataFrame():
9739
10011
  self.__validate_sum_of_list_for_sample_api(frac, "frac")
9740
10012
  self.__validate_number_of_rows_for_sample_api(frac, "frac")
9741
10013
  case_when_then = {}
9742
- list_of_fracs = frac
10014
+ list_of_fracs = frac
9743
10015
 
9744
10016
  # When stratify column is passed for sample then perform TrainTestSplit
9745
10017
  # for data sampling.
@@ -9750,9 +10022,9 @@ class DataFrame():
9750
10022
  # For statify column Train Test split size must sum up to 1.
9751
10023
  if len(list_of_fracs) == 1:
9752
10024
  list_of_fracs.append(1 - list_of_fracs[0])
9753
-
10025
+
9754
10026
  # Call TrainTestSplit and return the result dataframe.
9755
- TrainTestSplit_out = TrainTestSplit(data = self,
10027
+ TrainTestSplit_out = TrainTestSplit(data=self,
9756
10028
  id_column=id_column,
9757
10029
  train_size=list_of_fracs[0],
9758
10030
  test_size=list_of_fracs[1],
@@ -9762,11 +10034,11 @@ class DataFrame():
9762
10034
  # for backward compatibility.
9763
10035
  _sampled_df = TrainTestSplit_out.result
9764
10036
  # Column name "TD_IsTrainRow" renamed to "sampleid".
9765
- return _sampled_df.assign(sampleid = case([
9766
- (_sampled_df.TD_IsTrainRow == 0, 2)],
9767
- else_=1)).drop("TD_IsTrainRow", axis = 1)
9768
-
9769
-
10037
+ return _sampled_df.assign(sampleid=case([
10038
+ (_sampled_df.TD_IsTrainRow == 0, 2)],
10039
+ else_=1)).drop("TD_IsTrainRow", axis=1)
10040
+
10041
+
9770
10042
 
9771
10043
  else:
9772
10044
  # Creating OrderDict for 'case_when_then' so that order of keys doesn't change after
@@ -9774,8 +10046,8 @@ class DataFrame():
9774
10046
  case_when_then = OrderedDict(case_when_then)
9775
10047
  if len(case_when_then) > 16:
9776
10048
  raise TeradataMlException(
9777
- Messages.get_message(MessageCodes.TDML_SAMPLE_INVALID_NUMBER_OF_SAMPLES, "case_when_then"),
9778
- MessageCodes.TDML_SAMPLE_INVALID_NUMBER_OF_SAMPLES)
10049
+ Messages.get_message(MessageCodes.TDML_SAMPLE_INVALID_NUMBER_OF_SAMPLES, "case_when_then"),
10050
+ MessageCodes.TDML_SAMPLE_INVALID_NUMBER_OF_SAMPLES)
9779
10051
 
9780
10052
  transformed_case_when_then = OrderedDict()
9781
10053
  for when_condition, then_sample_number in case_when_then.items():
@@ -9791,12 +10063,12 @@ class DataFrame():
9791
10063
 
9792
10064
  # Validating values in the dict.
9793
10065
  if isinstance(then_sample_number, int) or (isinstance(then_sample_number, list) \
9794
- and isinstance(then_sample_number[0], int)):
10066
+ and isinstance(then_sample_number[0], int)):
9795
10067
  _Validators._validate_function_arguments([["Values in case_when_then", then_sample_number,
9796
- True, (int, list)]])
10068
+ True, (int, list)]])
9797
10069
  else:
9798
10070
  _Validators._validate_function_arguments([["Values in case_when_then", then_sample_number,
9799
- True, ((float, list))]])
10071
+ True, ((float, list))]])
9800
10072
 
9801
10073
  if isinstance(then_sample_number, list):
9802
10074
  self.__validate_len_of_list_for_sample_api(then_sample_number, "case_when_then")
@@ -9818,11 +10090,11 @@ class DataFrame():
9818
10090
 
9819
10091
  case_else_awu_matrix = []
9820
10092
  if isinstance(case_else[0], int):
9821
- case_else_awu_matrix.append(['Number of rows or fractions in case_else',
9822
- case_else, True, (int, list)])
10093
+ case_else_awu_matrix.append(['Number of rows or fractions in case_else',
10094
+ case_else, True, (int, list)])
9823
10095
  else:
9824
- case_else_awu_matrix.append(['Number of rows or fractions in case_else',
9825
- case_else, True, (float, list)])
10096
+ case_else_awu_matrix.append(['Number of rows or fractions in case_else',
10097
+ case_else, True, (float, list)])
9826
10098
 
9827
10099
  # Validating argument values for 'case_else'.
9828
10100
  _Validators._validate_function_arguments(case_else_awu_matrix)
@@ -9848,16 +10120,25 @@ class DataFrame():
9848
10120
  for column in self.columns:
9849
10121
  self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column,
9850
10122
  column, df_columns_types)
9851
-
10123
+
9852
10124
  # As we are creating new column name, adding it to new metadata dict
9853
10125
  new_metaexpr_columns_types[sample_column] = INTEGER()
9854
10126
  sample_node_id = self._aed_utils._aed_sample(self._nodeid, ",".join(selected_columns),
9855
10127
  list_of_fracs, replace, randomize, case_when_then, case_else_var)
9856
- column_info = ((col_name, col_type) for col_name, col_type in
10128
+
10129
+ column_info = ((col_name, col_type) for col_name, col_type in
9857
10130
  new_metaexpr_columns_types.items())
9858
10131
  # Get new metaexpr for sample_node_id
9859
10132
  new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sample_node_id, column_info, is_persist=True)
9860
- return self._create_dataframe_from_node(sample_node_id, new_metaexpr, self._index_label)
10133
+
10134
+ # Make this non-lazy. Added this in order to fix https://teradata-pe.atlassian.net/browse/ELE-6368
10135
+ # Cannot use __execute_node_and_set_table_name because self points to original df.
10136
+ # Hence, setting the __table_name with _execute_node_return_db_object_name.
10137
+
10138
+ df = self._create_dataframe_from_node(sample_node_id, new_metaexpr, self._index_label)
10139
+ df.__table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
10140
+
10141
+ return df
9861
10142
 
9862
10143
  except TeradataMlException:
9863
10144
  raise
@@ -9873,7 +10154,8 @@ class DataFrame():
9873
10154
  msg = Messages.get_message(errcode)
9874
10155
  raise TeradataMlException(msg, errcode) from err
9875
10156
 
9876
- def show_query(self, full_query = False):
10157
+ @collect_queryband(queryband="DF_showQuery")
10158
+ def show_query(self, full_query=False):
9877
10159
  """
9878
10160
  DESCRIPTION:
9879
10161
  Function returns underlying SQL for the teradataml DataFrame. It is the same
@@ -10016,6 +10298,7 @@ class DataFrame():
10016
10298
  msg = Messages.get_message(errcode)
10017
10299
  raise TeradataMlException(msg, errcode) from err
10018
10300
 
10301
+ @collect_queryband(queryband="DF_mapRow")
10019
10302
  def map_row(self,
10020
10303
  user_function,
10021
10304
  exec_mode='IN-DB',
@@ -10083,9 +10366,6 @@ class DataFrame():
10083
10366
  * LOCAL: Execute the function locally on sample data (at
10084
10367
  most "num_rows" rows) from the teradataml
10085
10368
  DataFrame.
10086
- * SANDBOX: Execute the function locally within a sandbox
10087
- environment on sample data (at most "num_rows"
10088
- rows) from the teradataml DataFrame.
10089
10369
  Default value: 'IN-DB'
10090
10370
  Types: str
10091
10371
 
@@ -10103,7 +10383,7 @@ class DataFrame():
10103
10383
  Optional Argument.
10104
10384
  Specifies the maximum number of sample rows to use from the
10105
10385
  teradataml DataFrame to apply the user defined function to when
10106
- "exec_mode" is 'LOCAL' or 'SANDBOX'.
10386
+ "exec_mode" is 'LOCAL'.
10107
10387
  Default value: 1000
10108
10388
  Types: int
10109
10389
 
@@ -10383,6 +10663,7 @@ class DataFrame():
10383
10663
 
10384
10664
  return tbl_op_util.execute()
10385
10665
 
10666
+ @collect_queryband(queryband="DF_mapPartition")
10386
10667
  def map_partition(self,
10387
10668
  user_function,
10388
10669
  exec_mode='IN-DB',
@@ -10454,9 +10735,6 @@ class DataFrame():
10454
10735
  * LOCAL: Execute the function locally on sample data (at
10455
10736
  most "num_rows" rows) from the teradataml
10456
10737
  DataFrame.
10457
- * SANDBOX: Execute the function locally within a sandbox
10458
- environment on sample data (at most "num_rows"
10459
- rows) from the teradataml DataFrame.
10460
10738
  Default value: 'IN-DB'
10461
10739
  Types: str
10462
10740
 
@@ -10474,7 +10752,7 @@ class DataFrame():
10474
10752
  Optional Argument.
10475
10753
  Specifies the maximum number of sample rows to use from the
10476
10754
  teradataml DataFrame to apply the user defined function to when
10477
- "exec_mode" is 'LOCAL' or 'SANDBOX'.
10755
+ "exec_mode" is 'LOCAL'.
10478
10756
  Default value: 1000
10479
10757
  Types: int
10480
10758
 
@@ -10795,6 +11073,7 @@ class DataFrame():
10795
11073
 
10796
11074
  return tbl_op_util.execute()
10797
11075
 
11076
+ @collect_queryband(queryband="DF_apply")
10798
11077
  def apply(self,
10799
11078
  user_function,
10800
11079
  exec_mode='REMOTE',
@@ -11148,8 +11427,8 @@ class DataFrame():
11148
11427
  # When returns argument is not specified, assume output schema
11149
11428
  # is same as input table schema.
11150
11429
  default_returns = OrderedDict(zip(self.columns,
11151
- [col.type for col in
11152
- self._metaexpr.c]))
11430
+ [col.type for col in
11431
+ self._metaexpr.c]))
11153
11432
  returns = kwargs.pop('returns', default_returns)
11154
11433
  arg_info_matrix.append(["returns", returns, False, (dict)])
11155
11434
 
@@ -11194,6 +11473,7 @@ class DataFrame():
11194
11473
 
11195
11474
  return tbl_op_util.execute()
11196
11475
 
11476
+ @collect_queryband(queryband="DF_window")
11197
11477
  def window(self,
11198
11478
  partition_columns=None,
11199
11479
  order_columns=None,
@@ -11240,7 +11520,7 @@ class DataFrame():
11240
11520
  columns of a teradataml DataFrame.
11241
11521
  3. "partition_columns" supports only columns specified in
11242
11522
  groupby function, if window is initiated on DataFrameGroupBy.
11243
- Types: str OR list of Strings (str)
11523
+ Types: str OR list of Strings (str) OR ColumnExpression OR list of ColumnExpressions
11244
11524
 
11245
11525
  order_columns:
11246
11526
  Optional Argument.
@@ -11254,17 +11534,24 @@ class DataFrame():
11254
11534
  columns of a teradataml DataFrame.
11255
11535
  2. "order_columns" supports only columns specified in
11256
11536
  groupby, if window is initiated on DataFrameGroupBy.
11257
-
11258
- Types: str OR list of Strings (str)
11537
+ 3. When ColumnExpression(s) is(are) passed to "order_columns", then the
11538
+ corresponding expression takes precedence over arguments
11539
+ "sort_ascending" and "nulls_first". Say, ColumnExpression is col1, then
11540
+ 1. col1.asc() or col.desc() is effective irrespective of "sort_ascending".
11541
+ 2. col1.nulls_first() or col.nulls_last() is effective irrespective of "nulls_first".
11542
+ 3. Any combination of above two take precedence over "sort_ascending" and "nulls_first".
11543
+ Types: str OR list of Strings (str) OR ColumnExpression OR list of ColumnExpressions
11259
11544
 
11260
11545
  sort_ascending:
11261
11546
  Optional Argument.
11262
11547
  Specifies whether column ordering should be in ascending or
11263
11548
  descending order.
11264
11549
  Default Value: True (ascending)
11265
- Note:
11266
- When "order_columns" argument is not specified, this argument
11267
- is ignored.
11550
+ Notes:
11551
+ * When "order_columns" argument is not specified, this argument
11552
+ is ignored.
11553
+ * When ColumnExpression(s) is(are) passed to "order_columns", then the
11554
+ argument is ignored.
11268
11555
  Types: bool
11269
11556
 
11270
11557
  nulls_first:
@@ -11272,9 +11559,11 @@ class DataFrame():
11272
11559
  Specifies whether null results are to be listed first or last
11273
11560
  or scattered.
11274
11561
  Default Value: None
11275
- Note:
11276
- When "order_columns" argument is not specified, this argument
11277
- is ignored.
11562
+ Notes:
11563
+ * When "order_columns" argument is not specified, this argument
11564
+ is ignored.
11565
+ * When "order_columns" is a ColumnExpression(s), this argument
11566
+ is ignored.
11278
11567
  Types: bool
11279
11568
 
11280
11569
  window_start_point:
@@ -11362,19 +11651,18 @@ class DataFrame():
11362
11651
  # between unbounded preceding and 3 preceding with
11363
11652
  # "partition_columns" and "order_columns" argument with
11364
11653
  # default sorting.
11365
- >>> window = df.window(partition_columns="Feb",
11366
- ... order_columns=["Feb", "datetime"],
11654
+ >>> window = df.window(partition_columns=df.Feb,
11655
+ ... order_columns=[df.Feb, "datetime"],
11367
11656
  ... window_start_point=None,
11368
11657
  ... window_end_point=-3)
11369
11658
  >>>
11370
11659
 
11371
11660
  # Example 3: Create a moving (rolling) window with rows between
11372
- # current row and 3 following with sorting done on 'Feb',
11373
- # 'datetime' columns in descending order and
11374
- # "partition_columns" argument.
11375
- >>> window = df.window(partition_columns="Feb",
11376
- ... order_columns=["Feb", "datetime"],
11377
- ... sort_ascending=False,
11661
+ # current row and 3 following with sorting done on 'Feb'
11662
+ # in ascending order, datetime' columns in descending order
11663
+ # and "partition_columns" argument.
11664
+ >>> window = df.window(partition_columns=df.Feb,
11665
+ ... order_columns=[df.Feb.asc(), df.datetime.desc()],
11378
11666
  ... window_start_point=0,
11379
11667
  ... window_end_point=3)
11380
11668
  >>>
@@ -11384,30 +11672,26 @@ class DataFrame():
11384
11672
  # sorting done on 'Feb', 'datetime' columns in ascending
11385
11673
  # order and NULL values in 'Feb', 'datetime'
11386
11674
  # columns appears at last.
11387
- >>> window = df.window(partition_columns="Feb",
11388
- ... order_columns=["Feb", "datetime"],
11389
- ... nulls_first=False,
11675
+ >>> window = df.window(partition_columns=df.Feb,
11676
+ ... order_columns=[df.Feb.nulls_first(), df.datetime.nulls_first()],
11390
11677
  ... window_start_point=0,
11391
11678
  ... window_end_point=None)
11392
11679
  >>>
11393
11680
 
11394
11681
  # Example 5: Create a grouping window, with sorting done on 'Feb',
11395
- # 'datetime' columns in ascending order and NULL values
11396
- # in 'Feb', 'datetime' columns appears at last.
11682
+ # 'datetime' columns in ascending order with NULL values
11683
+ # in 'Feb' column appears at first and 'datetime' column
11684
+ # appears at last.
11397
11685
  >>> window = df.window(partition_columns="Feb",
11398
- ... order_columns=["Feb", "datetime"],
11399
- ... sort_ascending=False,
11400
- ... nulls_first=False,
11686
+ ... order_columns=[df.Feb.nulls_first(), df.datetime.nulls_last()],
11401
11687
  ... window_start_point=None,
11402
11688
  ... window_end_point=None)
11403
11689
  >>>
11404
11690
 
11405
11691
  # Example 6: Create a window on a teradataml DataFrame, which
11406
11692
  # ignores all the parameters while creating window.
11407
- >>> window = df.window(partition_columns="Feb",
11408
- ... order_columns=["Feb", "datetime"],
11409
- ... sort_ascending=False,
11410
- ... nulls_first=False,
11693
+ >>> window = df.window(partition_columns=df.Feb,
11694
+ ... order_columns=[df.Feb.desc().nulls_last(), df.datetime.desc().nulls_last()]
11411
11695
  ... ignore_window=True)
11412
11696
  >>>
11413
11697
 
@@ -11462,6 +11746,7 @@ class DataFrame():
11462
11746
  window_end_point=window_end_point,
11463
11747
  ignore_window=ignore_window)
11464
11748
 
11749
+ @collect_queryband(queryband="DF_dropDuplicate")
11465
11750
  def drop_duplicate(self, column_names=None):
11466
11751
  """
11467
11752
  DESCRIPTION:
@@ -11535,6 +11820,7 @@ class DataFrame():
11535
11820
  new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items())
11536
11821
  return self._create_dataframe_from_node(sel_nodeid, new_metaexpr, self._index_label)
11537
11822
 
11823
+ @collect_queryband(queryband="DF_toCsv")
11538
11824
  def to_csv(self, csv_file,
11539
11825
  num_rows=99999,
11540
11826
  all_rows=False,
@@ -11760,10 +12046,11 @@ class DataFrame():
11760
12046
  Messages.get_message(MessageCodes.DATA_EXPORT_FAILED, "to_csv",
11761
12047
  "CSV file", str(err)),
11762
12048
  MessageCodes.DATA_EXPORT_FAILED)
11763
-
12049
+
12050
+ @collect_queryband(queryband="DF_pivot")
11764
12051
  def pivot(self,
11765
12052
  columns=None,
11766
- aggfuncs=None,
12053
+ aggfuncs=None,
11767
12054
  limit_combinations=False,
11768
12055
  margins=None,
11769
12056
  returns=None,
@@ -12110,7 +12397,7 @@ class DataFrame():
12110
12397
  if columns_arg_req and isinstance(columns, dict):
12111
12398
  expected_value_types = (int, float, str, _ListOf(int), _ListOf(float), _ListOf(str), DataFrame)
12112
12399
  _Validators._validate_dict_argument_key_value(arg_name="columns", arg_dict=columns,
12113
- key_types=(ColumnExpression, ),
12400
+ key_types=(ColumnExpression,),
12114
12401
  value_types=expected_value_types)
12115
12402
  if margins:
12116
12403
  _Validators._validate_dict_argument_key_value(arg_name="margins", arg_dict=margins,
@@ -12159,7 +12446,7 @@ class DataFrame():
12159
12446
  "DataFrame specified as value in 'columns' argument "
12160
12447
  "should have only one column.")
12161
12448
  raise ValueError(err_)
12162
- _column_value = [* (i[0] for i in _v_df.drop_duplicate().get_values())]
12449
+ _column_value = [*(i[0] for i in _v_df.drop_duplicate().get_values())]
12163
12450
  else:
12164
12451
  # We are allowing users to pass an int, str, float or list of int, float, str.
12165
12452
  # Convert it to list, if it is not a list.
@@ -12238,7 +12525,7 @@ class DataFrame():
12238
12525
  non_participating_columns = [col for col in self.columns if col not in participating_columns]
12239
12526
 
12240
12527
  # Generating WITH clause.
12241
- with_clause, seperator = "", ""
12528
+ with_clause, seperator = "", ""
12242
12529
  with_clause_column_names = []
12243
12530
  if margins:
12244
12531
  # margins will be a dict. Key is analytic function name. Value can be a tuple or list of tuple.
@@ -12284,7 +12571,8 @@ class DataFrame():
12284
12571
  sql = "SELECT * FROM {} PIVOT ({for_clause} {with_clause}) {derived_table_clause}".format(
12285
12572
  self._table_name, for_clause=for_clause, with_clause=with_clause, derived_table_clause=tmp_clause)
12286
12573
  return DataFrame.from_query(sql)
12287
-
12574
+
12575
+ @collect_queryband(queryband="DF_unpivot")
12288
12576
  def unpivot(self,
12289
12577
  columns=None,
12290
12578
  transpose_column=None,
@@ -12546,7 +12834,7 @@ class DataFrame():
12546
12834
  arg_info_matrix.append(["exclude_nulls", exclude_nulls, True, (bool)])
12547
12835
  arg_info_matrix.append(["returns", returns, True, (str, list), True])
12548
12836
  arg_info_matrix.append(["all_columns", all_columns, True, (bool)])
12549
- for i in range(1, int(len(kwargs)/2) + 1):
12837
+ for i in range(1, int(len(kwargs) / 2) + 1):
12550
12838
  # Get the values of colN where N is in range(1, half the length of kwargs + 1).
12551
12839
  col = kwargs.get("col{}".format(i), None)
12552
12840
  col_val = kwargs.get("col{}_value".format(i), None)
@@ -12703,7 +12991,7 @@ class DataFrame():
12703
12991
  self._nodeid, self._metaexpr)
12704
12992
 
12705
12993
  # Generate the SELECT query.
12706
- select_query = 'SELECT * FROM {tbl_name} UNPIVOT{excl_null} ({for_cl}) {tmp_cl};'.\
12994
+ select_query = 'SELECT * FROM {tbl_name} UNPIVOT{excl_null} ({for_cl}) {tmp_cl};'. \
12707
12995
  format(tbl_name=self._table_name,
12708
12996
  excl_null="" if exclude_nulls else " INCLUDE NULLS",
12709
12997
  for_cl=for_clause,
@@ -12712,6 +13000,7 @@ class DataFrame():
12712
13000
  # Create the teradataml dataframe from SELECT query and return the same.
12713
13001
  return DataFrame.from_query(select_query)
12714
13002
 
13003
+ @collect_queryband(queryband="DF_plot")
12715
13004
  def plot(self, x, y, scale=None, kind="line", **kwargs):
12716
13005
  """
12717
13006
  DESCRIPTION:
@@ -13075,6 +13364,14 @@ class DataFrame():
13075
13364
  Applicable only for the wiggle and mesh plots.
13076
13365
  Types: int OR float
13077
13366
 
13367
+ ignore_nulls:
13368
+ Optional Argument.
13369
+ Specifies whether to delete rows with null values or not present in 'x', 'y' and
13370
+ 'scale' params.
13371
+ Default Value: False
13372
+ Types: bool
13373
+
13374
+
13078
13375
  RAISES:
13079
13376
  TeradataMlException
13080
13377
 
@@ -13437,6 +13734,7 @@ class DataFrame():
13437
13734
  """
13438
13735
  return _Plot(x=x, y=y, scale=scale, kind=kind, **kwargs)
13439
13736
 
13737
+ @collect_queryband(queryband="DF_itertuples")
13440
13738
  def itertuples(self, name='Row', num_rows=None):
13441
13739
  """
13442
13740
  DESCRIPTION:
@@ -13513,6 +13811,287 @@ class DataFrame():
13513
13811
  for rec in cur:
13514
13812
  yield rec
13515
13813
 
13814
+ @collect_queryband(queryband="DF_replace")
13815
+ def replace(self, to_replace, value=None, subset=None):
13816
+ """
13817
+ DESCRIPTION:
13818
+ Function replaces every occurrence of "to_replace" with the "value"
13819
+ in the columns mentioned in "subset". When "subset" is not provided,
13820
+ function replaces in all columns.
13821
+
13822
+ PARAMETERS:
13823
+ to_replace:
13824
+ Required Argument.
13825
+ Specifies a ColumnExpression or a literal that the function
13826
+ searches for values in the Column. Use ColumnExpression when
13827
+ you want to match the condition based on a DataFrameColumn
13828
+ function, else use literal.
13829
+ Note:
13830
+ Only ColumnExpressions generated from DataFrameColumn
13831
+ functions are supported. BinaryExpressions are not supported.
13832
+ Example: Consider teradataml DataFrame has two columns COL1, COL2.
13833
+ df.COL1.abs() is supported but df.COL1 == df.COL2 is not
13834
+ supported.
13835
+ Supported column types: CHAR, VARCHAR, FLOAT, INTEGER, DECIMAL
13836
+ Types: ColumnExpression OR int OR float OR str OR dict
13837
+
13838
+ value:
13839
+ Required argument when "to_replace" is not a dictionary. Optional otherwise.
13840
+ Specifies a ColumnExpression or a literal that replaces
13841
+ the "to_replace" in the column. Use ColumnExpression when
13842
+ you want to replace based on a DataFrameColumn function, else
13843
+ use literal.
13844
+ Notes:
13845
+ * Argument is ignored if "to_replace" is a dictionary.
13846
+ * Only ColumnExpressions generated from DataFrameColumn
13847
+ functions are supported. BinaryExpressions are not supported.
13848
+ Example: Consider teradataml DataFrame has two columns COL1, COL2.
13849
+ df.COL1.abs() is supported but df.COL1 == df.COL2 is not
13850
+ supported.
13851
+ Supported column types: CHAR, VARCHAR, FLOAT, INTEGER, DECIMAL
13852
+ Types: ColumnExpression OR int OR float OR str
13853
+
13854
+ subset:
13855
+ Optional Argument.
13856
+ Specifies column(s) to consider for replacing the values.
13857
+ Types: ColumnExpression OR str OR list
13858
+
13859
+ RAISES:
13860
+ TeradataMlException
13861
+
13862
+ RETURNS:
13863
+ teradataml DataFrame
13864
+
13865
+ EXAMPLES:
13866
+ # Load the data to run the example.
13867
+ >>> load_example_data("dataframe", "admissions_train")
13868
+
13869
+ # Create a DataFrame on 'admissions_train' table.
13870
+ >>> df = DataFrame("admissions_train")
13871
+ >>> print(df)
13872
+ masters gpa stats programming admitted
13873
+ id
13874
+ 15 yes 4.00 Advanced Advanced 1
13875
+ 34 yes 3.85 Advanced Beginner 0
13876
+ 13 no 4.00 Advanced Novice 1
13877
+ 38 yes 2.65 Advanced Beginner 1
13878
+ 5 no 3.44 Novice Novice 0
13879
+ 40 yes 3.95 Novice Beginner 0
13880
+ 7 yes 2.33 Novice Novice 1
13881
+ 22 yes 3.46 Novice Beginner 0
13882
+ 26 yes 3.57 Advanced Advanced 1
13883
+ 17 no 3.83 Advanced Advanced 1
13884
+
13885
+ # Example 1: Replace the string 'Advanced' with 'Good' in columns 'stats'
13886
+ # and 'programming'.
13887
+ >>> res = df.replace("Advanced", "Good", subset=["stats", "programming"])
13888
+ >>> print(res)
13889
+ masters gpa stats programming admitted
13890
+ id
13891
+ 13 no 4.00 Good Novice 1
13892
+ 36 no 3.00 Good Novice 0
13893
+ 15 yes 4.00 Good Good 1
13894
+ 40 yes 3.95 Novice Beginner 0
13895
+ 22 yes 3.46 Novice Beginner 0
13896
+ 38 yes 2.65 Good Beginner 1
13897
+ 26 yes 3.57 Good Good 1
13898
+ 5 no 3.44 Novice Novice 0
13899
+ 7 yes 2.33 Novice Novice 1
13900
+ 19 yes 1.98 Good Good 0
13901
+
13902
+ # Example 2: Replace the string 'Advanced' with 'Good' and 'Beginner' with 'starter'
13903
+ # in columns 'stats' and 'programming'.
13904
+ >>> res = df.replace({"Advanced": "Good", "Beginner": "starter"}, subset=["stats", "programming"])
13905
+ >>> print(res)
13906
+ masters gpa stats programming admitted
13907
+ id
13908
+ 15 yes 4.00 Good Good 1
13909
+ 7 yes 2.33 Novice Novice 1
13910
+ 22 yes 3.46 Novice starter 0
13911
+ 17 no 3.83 Good Good 1
13912
+ 13 no 4.00 Good Novice 1
13913
+ 38 yes 2.65 Good starter 1
13914
+ 26 yes 3.57 Good Good 1
13915
+ 5 no 3.44 Novice Novice 0
13916
+ 34 yes 3.85 Good starter 0
13917
+ 40 yes 3.95 Novice starter 0
13918
+
13919
+ # Example 3: Append the string '_New' to 'stats' column when values in
13920
+ # 'programming' and 'stats' are same.
13921
+ >>> res = df.replace({df.programming: df.stats+"_New"}, subset=["stats"])
13922
+ >>> print(res)
13923
+ masters gpa stats programming admitted
13924
+ id
13925
+ 15 yes 4.00 Advanced_New Advanced 1
13926
+ 34 yes 3.85 Advanced Beginner 0
13927
+ 13 no 4.00 Advanced Novice 1
13928
+ 38 yes 2.65 Advanced Beginner 1
13929
+ 5 no 3.44 Novice_New Novice 0
13930
+ 40 yes 3.95 Novice Beginner 0
13931
+ 7 yes 2.33 Novice_New Novice 1
13932
+ 22 yes 3.46 Novice Beginner 0
13933
+ 26 yes 3.57 Advanced_New Advanced 1
13934
+ 17 no 3.83 Advanced_New Advanced 1
13935
+
13936
+ # Example 4: Round the values of gpa to it's nearest integer.
13937
+ >>> res = df.replace({df.gpa: df.gpa.round(0)}, subset=["gpa"])
13938
+ >>> print(res)
13939
+ masters gpa stats programming admitted
13940
+ id
13941
+ 15 yes 4.0 Advanced Advanced 1
13942
+ 7 yes 2.0 Novice Novice 1
13943
+ 22 yes 3.0 Novice Beginner 0
13944
+ 17 no 4.0 Advanced Advanced 1
13945
+ 13 no 4.0 Advanced Novice 1
13946
+ 38 yes 3.0 Advanced Beginner 1
13947
+ 26 yes 4.0 Advanced Advanced 1
13948
+ 5 no 3.0 Novice Novice 0
13949
+ 34 yes 4.0 Advanced Beginner 0
13950
+ 40 yes 4.0 Novice Beginner 0
13951
+
13952
+ # Example 5: Replace the value of masters with '1' if value is 'yes'
13953
+ # and with '0' if value is no.
13954
+ >>> res = df.replace({'yes': 1, 'no': 0}, subset=["masters"])
13955
+ >>> print(res)
13956
+ masters gpa stats programming admitted
13957
+ id
13958
+ 15 1 4.00 Advanced Advanced 1
13959
+ 7 1 2.33 Novice Novice 1
13960
+ 22 1 3.46 Novice Beginner 0
13961
+ 17 0 3.83 Advanced Advanced 1
13962
+ 13 0 4.00 Advanced Novice 1
13963
+ 38 1 2.65 Advanced Beginner 1
13964
+ 26 1 3.57 Advanced Advanced 1
13965
+ 5 0 3.44 Novice Novice 0
13966
+ 34 1 3.85 Advanced Beginner 0
13967
+ 40 1 3.95 Novice Beginner 0
13968
+ """
13969
+ _validation_matrix = []
13970
+ _validation_matrix.append(["to_replace", to_replace, True, (int, float, str, dict, ColumnExpression)])
13971
+ _validation_matrix.append(["value", value, False, (int, float, str, dict, type(None), ColumnExpression)])
13972
+ _validation_matrix.append(["subset", subset, False, (str, list, type(None))])
13973
+ _Validators._validate_function_arguments(_validation_matrix)
13974
+
13975
+ if subset is None:
13976
+ subset = self.columns
13977
+ else:
13978
+ subset = [col.name if not isinstance(col, str) else col for col in UtilFuncs._as_list(subset)]
13979
+
13980
+ if not isinstance(to_replace, dict):
13981
+ to_replace = {to_replace: value}
13982
+
13983
+ new_columns = {}
13984
+ for column in self.columns:
13985
+ new_columns[column] = self[column].replace(to_replace) if column in subset else self[column]
13986
+ return self.assign(**new_columns, drop_columns=True).select(self.columns)
13987
+
13988
+ @collect_queryband(queryband="DF_cube")
13989
+ def cube(self, columns):
13990
+ """
13991
+ DESCRIPTION:
13992
+ cube() function creates a multi-dimensional cube for the DataFrame
13993
+ using the specified column(s), and there by running aggregates on
13994
+ it to produce the aggregations on different dimensions.
13995
+
13996
+
13997
+ PARAMETERS:
13998
+ columns:
13999
+ Required Argument.
14000
+ Specifies the name(s) of input teradataml DataFrame column(s).
14001
+ Types: str OR list of str(s)
14002
+
14003
+ RETURNS:
14004
+ teradataml DataFrameGroupBy
14005
+
14006
+ RAISES:
14007
+ TeradataMlException
14008
+
14009
+ EXAMPLES :
14010
+ # Example 1: Analyzes the data by grouping into masters and stats dimensions.
14011
+ >>> load_example_data("dataframe","admissions_train")
14012
+ >>> df = DataFrame("admissions_train")
14013
+ >>> df1 = df.cube(["masters", "stats"]).sum()
14014
+ >>> df1
14015
+ masters stats sum_id sum_gpa sum_admitted
14016
+ 0 no Beginner 8 3.60 1
14017
+ 1 None Advanced 555 84.21 16
14018
+ 2 None Beginner 21 18.31 3
14019
+ 3 yes Beginner 13 14.71 2
14020
+ 4 None None 820 141.67 26
14021
+ 5 yes Advanced 366 49.26 7
14022
+ 6 no None 343 63.96 16
14023
+ 7 None Novice 244 39.15 7
14024
+ 8 no Advanced 189 34.95 9
14025
+ 9 yes Novice 98 13.74 1
14026
+
14027
+ """
14028
+ # Validate columns argument.
14029
+ arg_info_matrix = []
14030
+ arg_info_matrix.append(["columns", columns, False, (str, list), True])
14031
+
14032
+ # Validate argument types
14033
+ _Validators._validate_function_arguments(arg_info_matrix)
14034
+
14035
+ # Checking each element in passed columns to be valid column in dataframe
14036
+ _Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
14037
+
14038
+ # Query generation of cube API is same as the group by.
14039
+ # Only 'cube' is concatenated with 'group by' clause.
14040
+ return self.groupby(columns, option="cube")
14041
+
14042
+ @collect_queryband(queryband="DF_rollup")
14043
+ def rollup(self, columns):
14044
+ """
14045
+ DESCRIPTION:
14046
+ rollup() function creates a multi-dimensional rollup for the DataFrame
14047
+ using the specified column(s), and there by running aggregates on
14048
+ it to produce the aggregations on different dimensions.
14049
+
14050
+
14051
+ PARAMETERS:
14052
+ columns:
14053
+ Required Argument.
14054
+ Specifies the name(s) of input teradataml DataFrame column(s).
14055
+ Types: str OR list of str(s)
14056
+
14057
+ RETURNS:
14058
+ teradataml DataFrameGroupBy
14059
+
14060
+ RAISES:
14061
+ TeradataMlException
14062
+
14063
+ EXAMPLES :
14064
+ # Example 1: Analyzes the data by grouping into masters and stats dimensions.
14065
+ >>> load_example_data("dataframe","admissions_train")
14066
+ >>> df = DataFrame("admissions_train")
14067
+ >>> df1 = df.rollup(["masters", "stats"]).sum()
14068
+ >>> df1
14069
+ masters stats sum_id sum_gpa sum_admitted
14070
+ 0 no None 343 63.96 16
14071
+ 1 yes None 477 77.71 10
14072
+ 2 None None 820 141.67 26
14073
+ 3 no Novice 146 25.41 6
14074
+ 4 no Beginner 8 3.60 1
14075
+ 5 yes Novice 98 13.74 1
14076
+ 6 yes Beginner 13 14.71 2
14077
+ 7 yes Advanced 366 49.26 7
14078
+ 8 no Advanced 189 34.95 9
14079
+
14080
+ """
14081
+ # Validate columns argument.
14082
+ arg_info_matrix = []
14083
+ arg_info_matrix.append(["columns", columns, False, (str, list), True])
14084
+
14085
+ # Validate argument types
14086
+ _Validators._validate_function_arguments(arg_info_matrix)
14087
+
14088
+ # Checking each element in passed columns to be valid column in dataframe
14089
+ _Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
14090
+
14091
+ # Query generation of cube API is same as the group by.
14092
+ # Only 'rollup' is concatenated with 'group by' clause.
14093
+ return self.groupby(columns, option="rollup")
14094
+
13516
14095
 
13517
14096
  class DataFrameGroupBy(DataFrame):
13518
14097
  """
@@ -13520,7 +14099,8 @@ class DataFrameGroupBy(DataFrame):
13520
14099
  Updates AED node for DataFrame groupby object.
13521
14100
 
13522
14101
  """
13523
- def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list):
14102
+
14103
+ def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list, option=None):
13524
14104
  """
13525
14105
  init() method for DataFrameGroupBy.
13526
14106
 
@@ -13555,11 +14135,17 @@ class DataFrameGroupBy(DataFrame):
13555
14135
  Specifies list of columns provided by user to be part group by clause.
13556
14136
  Types: str or List of Strings
13557
14137
 
14138
+ option:
14139
+ Optional Argument.
14140
+ Specifies the groupby option.
14141
+ Permitted Values: "CUBE", "ROLLUP", None
14142
+ Types: str or NoneType
14143
+
13558
14144
  RETURNS:
13559
14145
  teradataml DataFrameGroupBy instance
13560
14146
  """
13561
14147
  super(DataFrameGroupBy, self).__init__()
13562
- self._nodeid = self._aed_utils._aed_groupby(nodeid, groupbyexpr)
14148
+ self._nodeid = self._aed_utils._aed_groupby(nodeid, groupbyexpr, option)
13563
14149
  self._metaexpr = metaexpr
13564
14150
  self._column_names_and_types = column_names_and_types
13565
14151
  self._columns = columns
@@ -13583,7 +14169,7 @@ class DataFrameGroupBy(DataFrame):
13583
14169
  allowed_types = self._get_assign_allowed_types()
13584
14170
  """
13585
14171
  from sqlalchemy.sql.functions import Function
13586
- return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression)
14172
+ return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression, ClauseElement)
13587
14173
 
13588
14174
  def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
13589
14175
  """
@@ -13675,8 +14261,10 @@ class DataFrameGroupByTime(DataFrame):
13675
14261
  Updates AED node for DataFrame GROUP BY TIME object.
13676
14262
 
13677
14263
  """
13678
- def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupby_value_expr, column_list, timebucket_duration,
13679
- value_expression = None, timecode_column = None, sequence_column = None, fill = None):
14264
+
14265
+ def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupby_value_expr, column_list,
14266
+ timebucket_duration,
14267
+ value_expression=None, timecode_column=None, sequence_column=None, fill=None):
13680
14268
  """
13681
14269
  init() method for DataFrameGroupByTime.
13682
14270
 
@@ -13762,10 +14350,10 @@ class DataFrameGroupByTime(DataFrame):
13762
14350
  timecode_column = "" if timecode_column is None else UtilFuncs._process_for_teradata_keyword(timecode_column)
13763
14351
  sequence_column = "" if sequence_column is None else UtilFuncs._process_for_teradata_keyword(sequence_column)
13764
14352
 
13765
- self._nodeid = self._aed_utils._aed_groupby_time(nodeid = nodeid, timebucket_duration = timebucket_duration,
13766
- value_expression = groupby_value_expr,
13767
- using_timecode = timecode_column, seqno_col = sequence_column,
13768
- fill = fill)
14353
+ self._nodeid = self._aed_utils._aed_groupby_time(nodeid=nodeid, timebucket_duration=timebucket_duration,
14354
+ value_expression=groupby_value_expr,
14355
+ using_timecode=timecode_column, seqno_col=sequence_column,
14356
+ fill=fill)
13769
14357
 
13770
14358
  # MetaExpression is same as that of parent.
13771
14359
  self._metaexpr = metaexpr
@@ -13788,6 +14376,7 @@ class DataFrameGroupByTime(DataFrame):
13788
14376
  self._sequence_column = sequence_column
13789
14377
  self._fill = fill
13790
14378
 
14379
+ @collect_queryband(queryband="DF_bottom")
13791
14380
  def bottom(self, number_of_values_to_column, with_ties=False):
13792
14381
  """
13793
14382
  DESCRIPTION:
@@ -14059,6 +14648,7 @@ class DataFrameGroupByTime(DataFrame):
14059
14648
 
14060
14649
  return self.__process_time_series_aggregate_with_multi_input_arguments(number_of_values_to_column, operation)
14061
14650
 
14651
+ @collect_queryband(queryband="DF_deltaT")
14062
14652
  def delta_t(self, start_condition, end_condition):
14063
14653
  """
14064
14654
  DESCRIPTION:
@@ -14310,12 +14900,14 @@ class DataFrameGroupByTime(DataFrame):
14310
14900
  operation = "delta_t"
14311
14901
 
14312
14902
  kwargs = {
14313
- "start_condition": start_condition.compile() if isinstance(start_condition, ColumnExpression) else start_condition,
14903
+ "start_condition": start_condition.compile() if isinstance(start_condition,
14904
+ ColumnExpression) else start_condition,
14314
14905
  "end_condition": end_condition.compile() if isinstance(end_condition, ColumnExpression) else end_condition
14315
14906
  }
14316
14907
  return self._get_dataframe_aggregate(operation=operation, **kwargs)
14317
14908
 
14318
- def first(self, columns = None):
14909
+ @collect_queryband(queryband="DF_first")
14910
+ def first(self, columns=None):
14319
14911
  """
14320
14912
  DESCRIPTION:
14321
14913
  Returns the oldest value, determined by the timecode, for each group. FIRST is a single-threaded function.
@@ -14507,8 +15099,9 @@ class DataFrameGroupByTime(DataFrame):
14507
15099
  # Checking each element in passed columns to be valid column in dataframe
14508
15100
  _Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
14509
15101
 
14510
- return self._get_dataframe_aggregate(operation = 'first', columns=columns)
15102
+ return self._get_dataframe_aggregate(operation='first', columns=columns)
14511
15103
 
15104
+ @collect_queryband(queryband="DF_last")
14512
15105
  def last(self, columns=None):
14513
15106
  """
14514
15107
  DESCRIPTION:
@@ -14703,8 +15296,9 @@ class DataFrameGroupByTime(DataFrame):
14703
15296
  # Checking each element in passed columns to be valid column in dataframe
14704
15297
  _Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
14705
15298
 
14706
- return self._get_dataframe_aggregate(operation = 'last', columns=columns)
15299
+ return self._get_dataframe_aggregate(operation='last', columns=columns)
14707
15300
 
15301
+ @collect_queryband(queryband="DF_mad")
14708
15302
  def mad(self, constant_multiplier_columns=None):
14709
15303
  """
14710
15304
  DESCRIPTION:
@@ -14912,6 +15506,7 @@ class DataFrameGroupByTime(DataFrame):
14912
15506
 
14913
15507
  return self.__process_time_series_aggregate_with_multi_input_arguments(constant_multiplier_columns, 'mad')
14914
15508
 
15509
+ @collect_queryband(queryband="DF_mode")
14915
15510
  def mode(self):
14916
15511
  """
14917
15512
  DESCRIPTION:
@@ -15066,6 +15661,7 @@ class DataFrameGroupByTime(DataFrame):
15066
15661
  """
15067
15662
  return self._get_dataframe_aggregate(operation='mode')
15068
15663
 
15664
+ @collect_queryband(queryband="DF_percentile")
15069
15665
  def percentile(self, percentile, distinct=False, interpolation="LINEAR"):
15070
15666
  """
15071
15667
  DESCRIPTION:
@@ -15330,6 +15926,7 @@ class DataFrameGroupByTime(DataFrame):
15330
15926
  return self._get_dataframe_aggregate(operation='percentile', percentile=percentile,
15331
15927
  distinct=distinct, interpolation=interpolation)
15332
15928
 
15929
+ @collect_queryband(queryband="DF_top")
15333
15930
  def top(self, number_of_values_to_column, with_ties=False):
15334
15931
  """
15335
15932
  DESCRIPTION:
@@ -15745,7 +16342,8 @@ class DataFrameGroupByTime(DataFrame):
15745
16342
  remaining_columns = list(set(self.columns) - set(columns_processed))
15746
16343
  unsupported_types = _Dtypes._get_unsupported_data_types_for_aggregate_operations(operation)
15747
16344
  for column in remaining_columns:
15748
- if not isinstance(self._td_column_names_and_sqlalchemy_types[column.lower()], tuple(unsupported_types)):
16345
+ if not isinstance(self._td_column_names_and_sqlalchemy_types[column.lower()],
16346
+ tuple(unsupported_types)):
15749
16347
  # We should not involve columns used in value expression of GROUP BY TIME clause as well.
15750
16348
  if column not in self._value_expression:
15751
16349
  default_constant_for_columns.append(column)
@@ -15964,9 +16562,9 @@ class _TDUAF(DataFrame):
15964
16562
  self._awu_matrix.append(["id_sequence", self._id_sequence, True, (str, list), True])
15965
16563
  self._awu_matrix.append(["payload_field", self._payload_field, not self._is_payload_required(), (str, list), True])
15966
16564
  self._awu_matrix.append(["payload_content", self._payload_content, not self._is_payload_required(), str, True,
15967
- ["REAL", "COMPLEX", "AMPL_PHASE", "AMPL_PHASE_RADIANS", "AMPL_PHASE_DEGREES",
15968
- "MULTIVAR_REAL", "MULTIVAR_COMPLEX", "MULTIVAR_ANYTYPE", "MULTIVAR_AMPL_PHASE",
15969
- "MULTIVAR_AMPL_PHASE_RADIANS", "MULTIVAR_AMPL_PHASE_DEGREES"]])
16565
+ ["REAL", "COMPLEX", "AMPL_PHASE", "AMPL_PHASE_RADIANS", "AMPL_PHASE_DEGREES",
16566
+ "MULTIVAR_REAL", "MULTIVAR_COMPLEX", "MULTIVAR_ANYTYPE", "MULTIVAR_AMPL_PHASE",
16567
+ "MULTIVAR_AMPL_PHASE_RADIANS", "MULTIVAR_AMPL_PHASE_DEGREES"]])
15970
16568
  self._awu_matrix.append(["layer", self._layer, True, str, True])
15971
16569
 
15972
16570
  # store the columns to check against the DataFrame.
@@ -16061,7 +16659,7 @@ class _TDUAF(DataFrame):
16061
16659
  # Declare a function to return a generator object. Note that, this should be a function because,
16062
16660
  # generator object exhausts after the first usage. So, if it is a regular variable,
16063
16661
  # _non_parameterised_sql will not have any data to consume from generator object.
16064
- get_sql_clauses = lambda : ("{}{}".format(" "*4, c) for c in sql_clauses)
16662
+ get_sql_clauses = lambda: ("{}{}".format(" " * 4, c) for c in sql_clauses)
16065
16663
 
16066
16664
  self._parameterised_sql = self._spec_header.format(
16067
16665
  "\n" + ", \n".join(get_sql_clauses()).format(*(["?"]*len(sql_values)))), sql_values
@@ -16345,7 +16943,7 @@ class TDSeries(_TDUAF):
16345
16943
 
16346
16944
  self._additional_spec["SERIES_ID ({})"] = ", ".join(UtilFuncs._as_list(self._id))
16347
16945
 
16348
- def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns = None):
16946
+ def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
16349
16947
  """
16350
16948
  DESCRIPTION:
16351
16949
  Function to call the _create_validate_dataframe_from_node which will create
@@ -16527,7 +17125,7 @@ class TDAnalyticResult(_TDUAF):
16527
17125
 
16528
17126
  self._spec_header = "ART_SPEC({})"
16529
17127
 
16530
- def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns = None):
17128
+ def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
16531
17129
  """
16532
17130
  DESCRIPTION:
16533
17131
  Function to call the _create_validate_dataframe_from_node which will create
@@ -16716,7 +17314,8 @@ class TDMatrix(_TDUAF):
16716
17314
  self._column_index = column_index
16717
17315
  self._column_index_style = column_index_style
16718
17316
 
16719
- super().__init__(data=data, id=id, row_index=row_index, row_index_style=row_index_style, id_sequence=id_sequence,
17317
+ super().__init__(data=data, id=id, row_index=row_index, row_index_style=row_index_style,
17318
+ id_sequence=id_sequence,
16720
17319
  payload_field=payload_field, payload_content=payload_content, layer=layer)
16721
17320
  self._awu_matrix.append(["column_index", self._column_index, False, (str), True])
16722
17321
  self._awu_matrix.append(
@@ -16733,8 +17332,7 @@ class TDMatrix(_TDUAF):
16733
17332
 
16734
17333
  self._additional_spec["MATRIX_ID ({})"] = ", ".join(UtilFuncs._as_list(self._id))
16735
17334
 
16736
-
16737
- def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns = None):
17335
+ def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
16738
17336
  """
16739
17337
  DESCRIPTION:
16740
17338
  Function to call the _create_validate_dataframe_from_node which will create
@@ -16793,6 +17391,7 @@ class TDGenSeries():
16793
17391
  """
16794
17392
  TDGenSeries class for UAF Functions.
16795
17393
  """
17394
+
16796
17395
  def __init__(self, instances, data_types, start, offset, num_entries):
16797
17396
  """
16798
17397
  Generate a series to be passed to a UAF function rather than using a
@@ -16849,7 +17448,7 @@ class TDGenSeries():
16849
17448
  # Create a TDGenSeries object to be passed as input to UAF functions.
16850
17449
  >>> series = TDGenSeries(instances = {"BuoyID": 3}, data_types = INTEGER(), start=0, offset=1, num_entries=5)
16851
17450
  """
16852
-
17451
+
16853
17452
  self._instances = instances
16854
17453
  self._data_types = data_types
16855
17454
  self._start = start
@@ -16943,4 +17542,4 @@ class TDGenSeries():
16943
17542
  if not self._parameterised_sql:
16944
17543
  self._generate_spec()
16945
17544
 
16946
- return self._parameterised_sql
17545
+ return self._parameterised_sql