teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +183 -0
  4. teradataml/__init__.py +6 -3
  5. teradataml/_version.py +2 -2
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +275 -40
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +1 -0
  11. teradataml/analytics/json_parser/utils.py +17 -21
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +10 -2
  15. teradataml/analytics/table_operator/__init__.py +3 -2
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +62 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1553 -319
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +276 -319
  22. teradataml/automl/data_transformation.py +163 -81
  23. teradataml/automl/feature_engineering.py +402 -239
  24. teradataml/automl/feature_exploration.py +9 -2
  25. teradataml/automl/model_evaluation.py +48 -51
  26. teradataml/automl/model_training.py +291 -189
  27. teradataml/catalog/byom.py +8 -8
  28. teradataml/catalog/model_cataloging_utils.py +1 -1
  29. teradataml/clients/auth_client.py +133 -0
  30. teradataml/clients/pkce_client.py +1 -1
  31. teradataml/common/aed_utils.py +3 -2
  32. teradataml/common/constants.py +48 -6
  33. teradataml/common/deprecations.py +13 -7
  34. teradataml/common/garbagecollector.py +156 -120
  35. teradataml/common/messagecodes.py +6 -1
  36. teradataml/common/messages.py +3 -1
  37. teradataml/common/sqlbundle.py +1 -1
  38. teradataml/common/utils.py +103 -11
  39. teradataml/common/wrapper_utils.py +1 -1
  40. teradataml/context/context.py +121 -31
  41. teradataml/data/advertising.csv +201 -0
  42. teradataml/data/bank_marketing.csv +11163 -0
  43. teradataml/data/bike_sharing.csv +732 -0
  44. teradataml/data/boston2cols.csv +721 -0
  45. teradataml/data/breast_cancer.csv +570 -0
  46. teradataml/data/complaints_test_tokenized.csv +353 -0
  47. teradataml/data/complaints_tokens_model.csv +348 -0
  48. teradataml/data/covid_confirm_sd.csv +83 -0
  49. teradataml/data/customer_segmentation_test.csv +2628 -0
  50. teradataml/data/customer_segmentation_train.csv +8069 -0
  51. teradataml/data/dataframe_example.json +10 -0
  52. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  53. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  54. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  55. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  56. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  57. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  58. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  59. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  60. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  61. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  62. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  63. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  64. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  65. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  66. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  67. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  68. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  69. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  70. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  71. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  72. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  73. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  74. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  75. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  76. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  77. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  78. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  79. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  80. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  81. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  82. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  83. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  84. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  85. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  86. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  87. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  88. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  89. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  90. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  91. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  92. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  93. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  94. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  95. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  96. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  97. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  98. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  99. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  100. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  101. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  102. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  103. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  104. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  105. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  106. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  107. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  108. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  109. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  110. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  111. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  112. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  113. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  114. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  115. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  116. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  117. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  118. teradataml/data/dwt2d_dataTable.csv +65 -0
  119. teradataml/data/dwt_dataTable.csv +8 -0
  120. teradataml/data/dwt_filterTable.csv +3 -0
  121. teradataml/data/finance_data4.csv +13 -0
  122. teradataml/data/glm_example.json +28 -1
  123. teradataml/data/grocery_transaction.csv +19 -0
  124. teradataml/data/housing_train_segment.csv +201 -0
  125. teradataml/data/idwt2d_dataTable.csv +5 -0
  126. teradataml/data/idwt_dataTable.csv +8 -0
  127. teradataml/data/idwt_filterTable.csv +3 -0
  128. teradataml/data/insect2Cols.csv +61 -0
  129. teradataml/data/interval_data.csv +5 -0
  130. teradataml/data/jsons/paired_functions.json +14 -0
  131. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  132. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  133. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  134. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  135. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  136. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  137. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  138. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  139. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  140. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  141. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  142. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  143. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  144. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  145. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  146. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  147. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  148. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  149. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  150. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  151. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  152. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  153. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  154. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  155. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  156. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  157. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  158. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  159. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  160. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  161. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  162. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  163. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  164. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  165. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  166. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  167. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  168. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  169. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  170. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  171. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  172. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  173. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  174. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  175. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  176. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  177. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  178. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  179. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  180. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  181. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  182. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  183. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  184. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  185. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  186. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  187. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  188. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  189. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  190. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  191. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  192. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  193. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  194. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  195. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  196. teradataml/data/kmeans_example.json +5 -0
  197. teradataml/data/kmeans_table.csv +10 -0
  198. teradataml/data/load_example_data.py +8 -2
  199. teradataml/data/naivebayestextclassifier_example.json +1 -1
  200. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  201. teradataml/data/onehot_encoder_train.csv +4 -0
  202. teradataml/data/openml_example.json +29 -0
  203. teradataml/data/peppers.png +0 -0
  204. teradataml/data/real_values.csv +14 -0
  205. teradataml/data/sax_example.json +8 -0
  206. teradataml/data/scale_attributes.csv +3 -0
  207. teradataml/data/scale_example.json +52 -1
  208. teradataml/data/scale_input_part_sparse.csv +31 -0
  209. teradataml/data/scale_input_partitioned.csv +16 -0
  210. teradataml/data/scale_input_sparse.csv +11 -0
  211. teradataml/data/scale_parameters.csv +3 -0
  212. teradataml/data/scripts/deploy_script.py +21 -2
  213. teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
  214. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
  215. teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
  216. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  217. teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
  218. teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
  219. teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
  220. teradataml/data/star_pivot.csv +8 -0
  221. teradataml/data/templates/open_source_ml.json +2 -1
  222. teradataml/data/teradataml_example.json +97 -1
  223. teradataml/data/timestamp_data.csv +4 -0
  224. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  225. teradataml/data/uaf_example.json +55 -1
  226. teradataml/data/unpivot_example.json +15 -0
  227. teradataml/data/url_data.csv +9 -0
  228. teradataml/data/windowdfft.csv +16 -0
  229. teradataml/data/ztest_example.json +16 -0
  230. teradataml/dataframe/copy_to.py +9 -4
  231. teradataml/dataframe/data_transfer.py +125 -64
  232. teradataml/dataframe/dataframe.py +575 -57
  233. teradataml/dataframe/dataframe_utils.py +47 -9
  234. teradataml/dataframe/fastload.py +273 -90
  235. teradataml/dataframe/functions.py +339 -0
  236. teradataml/dataframe/row.py +160 -0
  237. teradataml/dataframe/setop.py +2 -2
  238. teradataml/dataframe/sql.py +740 -18
  239. teradataml/dataframe/window.py +1 -1
  240. teradataml/dbutils/dbutils.py +324 -18
  241. teradataml/geospatial/geodataframe.py +1 -1
  242. teradataml/geospatial/geodataframecolumn.py +1 -1
  243. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  244. teradataml/lib/aed_0_1.dll +0 -0
  245. teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
  246. teradataml/options/__init__.py +16 -5
  247. teradataml/options/configure.py +39 -6
  248. teradataml/options/display.py +2 -2
  249. teradataml/plot/axis.py +4 -4
  250. teradataml/scriptmgmt/UserEnv.py +26 -19
  251. teradataml/scriptmgmt/lls_utils.py +120 -16
  252. teradataml/table_operators/Script.py +4 -5
  253. teradataml/table_operators/TableOperator.py +160 -26
  254. teradataml/table_operators/table_operator_util.py +88 -41
  255. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  256. teradataml/telemetry_utils/__init__.py +0 -0
  257. teradataml/telemetry_utils/queryband.py +52 -0
  258. teradataml/utils/validators.py +41 -3
  259. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
  260. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
  261. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  262. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  263. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -24,6 +24,7 @@ import teradataml.context.context as tdmlctx
24
24
  from collections import OrderedDict, namedtuple
25
25
  from sqlalchemy.sql import ClauseElement
26
26
  from teradataml import execute_sql
27
+ from teradataml import GarbageCollector
27
28
  from teradataml.dataframe.sql import _MetaExpression
28
29
  from teradataml.dataframe.sql_interfaces import ColumnExpression
29
30
  from teradataml.dataframe.sql_functions import case
@@ -41,6 +42,7 @@ from teradataml.dataframe.indexer import _LocationIndexer
41
42
  from teradataml.common.aed_utils import AedUtils
42
43
  from teradataml.options.display import display
43
44
  from teradataml.dataframe.copy_to import copy_to_sql
45
+ from teradataml.dataframe.row import _Row
44
46
  from teradataml.dataframe.setop import concat
45
47
  from teradataml.plot.plot import _Plot
46
48
  from teradataml.scriptmgmt.UserEnv import UserEnv
@@ -52,7 +54,9 @@ from teradatasql import OperationalError
52
54
  from teradataml.dataframe.window import Window
53
55
  from teradataml.dataframe.data_transfer import _DataTransferUtils
54
56
  from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
55
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
57
+ from teradataml.telemetry_utils.queryband import collect_queryband
58
+ from teradataml.options.configure import configure
59
+ from teradataml.utils.internal_buffer import _InternalBuffer
56
60
 
57
61
  # TODO use logger when available on master branch
58
62
  # logger = teradatapylog.getLogger()
@@ -150,6 +154,11 @@ class DataFrame():
150
154
  # This attribute added to add setter for columns property,
151
155
  # it is required when setting columns from groupby
152
156
  self._columns = None
157
+ # This attribute stores the internal AED query and avoid multiple
158
+ # calls to AED utility function aed_show_query()
159
+ self._aed_query = None
160
+ # This attribute stores the type of query stored in self._aed_query.
161
+ self._is_full_query = None
153
162
 
154
163
  # Property to determine if table is an ART table or not.
155
164
  self._is_art = None
@@ -417,6 +426,130 @@ class DataFrame():
417
426
 
418
427
  return df
419
428
 
429
+ def create_temp_view(self, name):
430
+ """
431
+ DESCRIPTION:
432
+ Creates a temporary view for session on the DataFrame.
433
+
434
+ PARAMETERS:
435
+ name:
436
+ Required Argument.
437
+ Specifies the name of the temporary view.
438
+ Type: str
439
+
440
+ RETURNS:
441
+ None
442
+
443
+ RAISES:
444
+ OperationalError (When view already exists).
445
+
446
+ EXAMPLES:
447
+ # Load the data to run the example.
448
+ >>> load_example_data("dataframe", "admissions_train")
449
+ >>> df = DataFrame("admissions_train")
450
+ >>> df
451
+ masters gpa stats programming admitted
452
+ id
453
+ 38 yes 2.65 Advanced Beginner 1
454
+ 7 yes 2.33 Novice Novice 1
455
+ 26 yes 3.57 Advanced Advanced 1
456
+ 17 no 3.83 Advanced Advanced 1
457
+ 34 yes 3.85 Advanced Beginner 0
458
+ 13 no 4.00 Advanced Novice 1
459
+ 32 yes 3.46 Advanced Beginner 0
460
+ 11 no 3.13 Advanced Advanced 1
461
+ 15 yes 4.00 Advanced Advanced 1
462
+ 36 no 3.00 Advanced Novice 0
463
+
464
+ # Example 1: Create view 'new_admissions'.
465
+ >>> df.create_temp_view("new_admissions")
466
+ >>> new_df = DataFrame("new_admissions")
467
+ >>> new_df
468
+ masters gpa stats programming admitted
469
+ id
470
+ 38 yes 2.65 Advanced Beginner 1
471
+ 7 yes 2.33 Novice Novice 1
472
+ 26 yes 3.57 Advanced Advanced 1
473
+ 17 no 3.83 Advanced Advanced 1
474
+ 34 yes 3.85 Advanced Beginner 0
475
+ 13 no 4.00 Advanced Novice 1
476
+ 32 yes 3.46 Advanced Beginner 0
477
+ 11 no 3.13 Advanced Advanced 1
478
+ 15 yes 4.00 Advanced Advanced 1
479
+ 36 no 3.00 Advanced Novice 0
480
+ """
481
+ # Validating Arguments
482
+ arg_type_matrix = []
483
+ arg_type_matrix.append(["name", name, False, (str), True])
484
+ _Validators._validate_function_arguments(arg_type_matrix)
485
+
486
+ GarbageCollector._add_to_garbagecollector(name, TeradataConstants.TERADATA_VIEW)
487
+ UtilFuncs._create_view(name, self.show_query())
488
+
489
+ def materialize(self):
490
+ """
491
+ DESCRIPTION:
492
+ Method to materialize teradataml DataFrame into a database object.
493
+ Notes:
494
+ * DataFrames are materialized in either view/table/volatile table,
495
+ which is decided and taken care by teradataml.
496
+ * If user wants to materialize object into specific database object
497
+ such as table/volatile table, use 'to_sql()' or 'copy_to_sql()' or
498
+ 'fastload()' functions.
499
+ * Materialized object is garbage collected at the end of the session.
500
+
501
+ PARAMETERS:
502
+ None
503
+
504
+ RETURNS:
505
+ DataFrame
506
+
507
+ EXAMPLES:
508
+ >>> load_example_data("dataframe", "admissions_train")
509
+ >>> df = DataFrame("admissions_train")
510
+ >>> df
511
+ masters gpa stats programming admitted
512
+ id
513
+ 13 no 4.00 Advanced Novice 1
514
+ 26 yes 3.57 Advanced Advanced 1
515
+ 5 no 3.44 Novice Novice 0
516
+ 19 yes 1.98 Advanced Advanced 0
517
+ 15 yes 4.00 Advanced Advanced 1
518
+ 40 yes 3.95 Novice Beginner 0
519
+ 7 yes 2.33 Novice Novice 1
520
+ 22 yes 3.46 Novice Beginner 0
521
+ 36 no 3.00 Advanced Novice 0
522
+ 38 yes 2.65 Advanced Beginner 1
523
+
524
+ # Example 1: Perform operations on teradataml DataFrame
525
+ # and materializeit in a database object.
526
+ >>> df2 = df.get([["id", "masters", "gpa"]])
527
+
528
+ # Initially table_name will be None.
529
+ >>> df2._table_name
530
+
531
+ >>> df2.materialize()
532
+ masters gpa
533
+ id
534
+ 15 yes 4.00
535
+ 7 yes 2.33
536
+ 22 yes 3.46
537
+ 17 no 3.83
538
+ 13 no 4.00
539
+ 38 yes 2.65
540
+ 26 yes 3.57
541
+ 5 no 3.44
542
+ 34 yes 3.85
543
+ 40 yes 3.95
544
+
545
+ # After materialize(), view name will be assigned.
546
+ >>> df2._table_name
547
+ '"ALICE"."ml__select__172077355985236"'
548
+ >>>
549
+ """
550
+ self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
551
+ return self
552
+
420
553
  @collect_queryband(queryband="DF_fillna")
421
554
  def fillna(self, value=None, columns=None, literal_value=False):
422
555
  """
@@ -5017,7 +5150,7 @@ class DataFrame():
5017
5150
  'median', 'var'
5018
5151
 
5019
5152
  Acceptable formats for function(s) are
5020
- string, dictionary or list of strings/functions.
5153
+ string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
5021
5154
 
5022
5155
  Accepted combinations are:
5023
5156
  1. String function name
@@ -5025,12 +5158,57 @@ class DataFrame():
5025
5158
  3. Dictionary containing column name as key and
5026
5159
  aggregate function name (string or list of
5027
5160
  strings) as value
5161
+ 4. ColumnExpression built using the aggregate functions.
5162
+ 5. List of ColumnExpression built using the aggregate functions.
5163
+
5164
+ Note:
5165
+ * The name of the output columns are generated based on aggregate functions and column names.
5166
+ For Example,
5167
+ 1. "func" passed as a string.
5168
+ >>> df.agg('mean')
5169
+ Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
5170
+ After the above operation, the output column names are:
5171
+ mean_employee_no, mean_marks, mean_dob, mean_joined_date
5172
+
5173
+ 2. "func" passed as a list of string functions.
5174
+ >>> df.agg(['min', 'sum'])
5175
+ Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
5176
+ After the above operation, the output column names are:
5177
+ min_employee_no, sum_employee_no, min_first_name, min_marks, sum_marks, min_dob, min_joined_date
5178
+
5179
+ 3. "func" passed as a dictionary containing column name as key and aggregate function name as value.
5180
+ >>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min']})
5181
+ Output column names after the above operation are:
5182
+ min_employee_no, sum_employee_no, var_employee_no, min_first_name
5183
+
5184
+ 4. "func" passed as a ColumnExpression built using the aggregate functions.
5185
+ >>> df.agg(df.first_name.count())
5186
+ Output column name after the above operation is:
5187
+ count(first_name)
5188
+
5189
+ 5. "func" passed as a list of ColumnExpression built using the aggregate functions.
5190
+ >>> df.agg([df.employee_no.min(), df.first_name.count()])
5191
+ Output column names after the above operation are:
5192
+ min(employee_no), count(first_name)
5193
+
5194
+ * On ColumnExpression or list of ColumnExpression alias() can be used to
5195
+ return the output columns with aliased name.
5196
+ For Example,
5197
+ >>> df.agg(df.first_name.count().alias("total_names"))
5198
+ Output column name after the above operation is:
5199
+ total_names
5200
+
5201
+ >>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
5202
+ Output column names after the above operation are:
5203
+ min_date, total_names
5204
+
5028
5205
 
5029
5206
  RETURNS:
5030
5207
  teradataml DataFrame object with operations
5031
5208
  mentioned in parameter 'func' performed on specified
5032
5209
  columns.
5033
5210
 
5211
+
5034
5212
  RAISES:
5035
5213
  TeradataMLException
5036
5214
  1. TDMLDF_AGGREGATE_FAILED - If operations on given columns
@@ -5072,8 +5250,8 @@ class DataFrame():
5072
5250
  valid datatype.
5073
5251
 
5074
5252
  Possible error message:
5075
- Invalid type(s) passed to argument 'func', should be:"\
5076
- "['str', 'list', 'dict'].
5253
+ Invalid type(s) passed to argument 'func', should be:
5254
+ ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression'].
5077
5255
 
5078
5256
  EXAMPLES :
5079
5257
  # Load the data to run the example.
@@ -5090,21 +5268,49 @@ class DataFrame():
5090
5268
  112 None None None 18/12/05
5091
5269
  >>>
5092
5270
 
5093
- # Dictionary of column names to string function/list of string functions as parameter.
5271
+ # Get the minimum, sum and variance of employee number and minimum and mean of name,
5272
+ # by passing dictionary of column names to string function/list of string functions as parameter.
5094
5273
  >>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min', 'mean']})
5095
- min_employee_no sum_employee_no var_employee_no min_first_name
5096
- 0 100 313 44.333333 abcd
5274
+ min_employee_no sum_employee_no var_employee_no min_first_name
5275
+ 0 100 313 44.333333 abcd
5097
5276
 
5098
- # List of string functions as parameter.
5277
+ # Get the minimum and sum of all the columns in the dataframe,
5278
+ # by passing list of string functions as parameter.
5099
5279
  >>> df.agg(['min', 'sum'])
5100
- min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
5101
- 0 100 313 abcd None None None 1902-05-12
5280
+ min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
5281
+ 0 100 313 abcd None None None 1902-05-12
5102
5282
 
5103
- # A string function as parameter.
5283
+ # Get the mean of all the columns in the dataframe, by passing string function as parameter.
5104
5284
  >>> df.agg('mean')
5105
5285
  mean_employee_no mean_marks mean_dob mean_joined_date
5106
5286
  0 104.333333 None None 60/12/04
5107
5287
 
5288
+ # Get the total names in the dataframe, by running count() on the "first_name"
5289
+ # and passing ColumnExpression as parameter.
5290
+ >>> df.agg(df.first_name.count())
5291
+ count(first_name)
5292
+ 0 2
5293
+
5294
+ # Get the minimum of joining date and total of names in the dataframe,
5295
+ # by running min() on joined_date and count() on the "first_name"
5296
+ # and passing list of ColumnExpression as parameter.
5297
+ >>> df.agg([df.employee_no.min(), df.first_name.count()])
5298
+ min(employee_no) count(first_name)
5299
+ 0 100 2
5300
+
5301
+ # Get the total names in the dataframe, by running count() on the "first_name" and
5302
+ # use alias() to have the output column named as "total_names".
5303
+ >>> df.agg(df.first_name.count().alias("total_names"))
5304
+ total_names
5305
+ 0 2
5306
+
5307
+ # Get the minimum of joining date and total names in the dataframe,
5308
+ # by running min() on joined_date and count() on the "first_name" and
5309
+ # use alias() to have the output column named as "min_date" and "total_names".
5310
+ >>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
5311
+ min_date total_names
5312
+ 0 02/12/05 2
5313
+
5108
5314
  # Select only subset of columns from the DataFrame.
5109
5315
  >>> df1 = df.select(['employee_no', 'first_name', 'joined_date'])
5110
5316
 
@@ -5145,9 +5351,9 @@ class DataFrame():
5145
5351
  raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, "func"),
5146
5352
  MessageCodes.MISSING_ARGS)
5147
5353
 
5148
- if not isinstance(func, str) and not isinstance(func, list) and not isinstance(func, dict):
5354
+ if not isinstance(func, (str, list, dict, ColumnExpression)):
5149
5355
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
5150
- 'func', ['str', 'list', 'dict']),
5356
+ 'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
5151
5357
  MessageCodes.UNSUPPORTED_DATATYPE)
5152
5358
 
5153
5359
  return self._get_dataframe_aggregate(func)
@@ -5169,6 +5375,8 @@ class DataFrame():
5169
5375
  3. Dictionary containing column name as key and
5170
5376
  aggregate function name (string or list of
5171
5377
  strings) as value
5378
+ 4. ColumnExpression built using the aggregate functions.
5379
+ 5. List of ColumnExpression built using the aggregate functions.
5172
5380
 
5173
5381
  **kwargs: Keyword arguments. Mainly used for Time Series Aggragates.
5174
5382
 
@@ -5345,7 +5553,9 @@ class DataFrame():
5345
5553
  result = self._check_numeric_overflow(agg_df)
5346
5554
  """
5347
5555
  try:
5348
- repr(result_df)
5556
+ # Printing the DF will actually run underlying select query and
5557
+ # will brought up numeric overflow if any. Only materializing won't work.
5558
+ print(result_df)
5349
5559
  return False
5350
5560
  except TeradataMlException as tme:
5351
5561
  if "Numeric overflow occurred during computation" in str(tme):
@@ -5481,18 +5691,73 @@ class DataFrame():
5481
5691
  EXAMPLES:
5482
5692
  self.__get_data_columns()
5483
5693
  """
5484
- self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
5485
-
5486
- query = repr(self._metaexpr) + ' FROM ' + self._table_name
5694
+ if not self._table_name:
5695
+ if not self._aed_query:
5696
+ self.__generate_aed_query()
5697
+ # TODO: Check the length of query and if it fails, create a view in catch block.
5698
+ # Address in this JIRA: https://teradata-pe.atlassian.net/browse/ELE-6922
5699
+ query = repr(self._metaexpr) + ' FROM ( ' + self._aed_query + ' ) as temp_table'
5700
+ else:
5701
+ query = repr(self._metaexpr) + ' FROM ' + self._table_name
5487
5702
 
5488
5703
  if self._orderby is not None:
5489
5704
  query += ' ORDER BY ' + self._orderby
5490
5705
 
5706
+ query += ';'
5491
5707
  # Execute the query and get the results in a list.
5492
5708
  self.__data, self.__data_columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
5493
5709
 
5494
5710
  return self.__data, self.__data_columns
5495
5711
 
5712
+ def __generate_aed_query(self, full_query=False):
5713
+ """
5714
+ DESCRIPTION:
5715
+ Internal function to return underlying SQL for the teradataml
5716
+ DataFrame. It is the same SQL that is used to view the data for
5717
+ a teradataml DataFrame.
5718
+
5719
+ PARAMETERS:
5720
+ full_query:
5721
+ Optional Argument.
5722
+ Specifies if the complete query for the dataframe should be returned.
5723
+ When this parameter is set to True, query for the dataframe is returned
5724
+ with respect to the base dataframe's table (from_table() or from_query())
5725
+ or from the output tables of analytical functions (if there are any in the
5726
+ workflow). This query may or may not be directly used to retrieve data
5727
+ for the dataframe upon which the function is called.
5728
+ When this parameter is not used, string returned is the query already used
5729
+ or will be used to retrieve data for the teradataml DataFrame.
5730
+ Default Value: False
5731
+ Types: bool
5732
+
5733
+ RETURNS:
5734
+ String representing the underlying SQL query for the teradataml DataFrame.
5735
+
5736
+ RAISES:
5737
+ None.
5738
+
5739
+ EXAMPLES:
5740
+ self.__generate_aed_query()
5741
+ """
5742
+ # Run aed call only when _aed_query is None or
5743
+ # the type of current stored query (full/short) is not matching
5744
+ # with asked query type.
5745
+ if (not self._aed_query) or (not self._is_full_query == full_query):
5746
+ node_id = self._nodeid
5747
+
5748
+ if isinstance(self, (DataFrameGroupBy, DataFrameGroupByTime)):
5749
+ # If dataframe is either of type groupby or groupbytime
5750
+ # then get its parent dataframe nodeid and return queries
5751
+ # for the same
5752
+ node_id = self._aed_utils._aed_get_parent_nodeids(self._nodeid)[0]
5753
+
5754
+ queries = self._aed_utils._aed_show_query(node_id, query_with_reference_to_top=full_query)
5755
+ # Store query and type of query in class attributes to avoid future runs.
5756
+ self._aed_query = queries[0][0]
5757
+ self._is_full_query = full_query
5758
+
5759
+ return self._aed_query
5760
+
5496
5761
  @collect_queryband(queryband="DF_select")
5497
5762
  def select(self, select_expression):
5498
5763
  """
@@ -7032,6 +7297,97 @@ class DataFrame():
7032
7297
  if function_name is None or function_name in VANTAGE_FUNCTION_ARGTYPE_DEPENDENT_MAPPER:
7033
7298
  self.__execute_node_and_set_table_name(self._nodeid)
7034
7299
  return True
7300
+
7301
+ def _assign_udf(self, udf_expr):
7302
+ """
7303
+ DESCRIPTION:
7304
+ Internal function for DataFrame.assign() to execute the udf using
7305
+ Script Table Operator and create new column for teradataml DataFrame.
7306
+
7307
+ PARAMETER:
7308
+ udf_expr:
7309
+ Required Argument.
7310
+ Specifies a dictionary of column name to UDF expressions.
7311
+ Types: dict
7312
+
7313
+ RETURNS:
7314
+ teradataml DataFrame
7315
+
7316
+ RAISES:
7317
+ None.
7318
+
7319
+ EXAMPLES:
7320
+ self._assign_udf(udf_expr)
7321
+ """
7322
+
7323
+ df = self
7324
+ env_name = None
7325
+ # Create a dictionary of env_name to list of output columns to be run on that env.
7326
+ env_mapper = OrderedDict()
7327
+
7328
+ exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
7329
+ if exec_mode == 'REMOTE':
7330
+ if _InternalBuffer.get("auth_token") is None:
7331
+ raise TeradataMlException(Messages.get_message(
7332
+ MessageCodes.FUNC_EXECUTION_FAILED, "'udf'", 'Authentication token is required to run udf. Set token using set_auth_token().'),
7333
+ MessageCodes.FUNC_EXECUTION_FAILED)
7334
+ else:
7335
+ for colname, col in udf_expr.items():
7336
+ env_name = UtilFuncs._get_env_name(col)
7337
+ # Store the env_name and its corresponding output column
7338
+ if env_name in env_mapper:
7339
+ env_mapper[env_name].append(colname)
7340
+ else:
7341
+ env_mapper[env_name] = [colname]
7342
+ else:
7343
+ env_mapper[env_name] = udf_expr.keys()
7344
+
7345
+ for env_name, cols in env_mapper.items():
7346
+ # Create a dictionary of output columns to column type.
7347
+ returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
7348
+ # Store the udf functions
7349
+ user_function = []
7350
+ # Create a dictionary of output column name to udf name
7351
+ columns_definitions = {}
7352
+ # Create a dictionary of output column name to udf arguments
7353
+ function_args = {}
7354
+ for colname, col in udf_expr.items():
7355
+ delimiter = col._delimiter
7356
+ quotechar = col._quotechar
7357
+ if colname in cols:
7358
+ user_function.append(col._udf)
7359
+ function_args[colname] = col._udf_args if col._udf_args else ()
7360
+ returns[colname] = col.type
7361
+ columns_definitions[colname] = col._udf.__name__
7362
+
7363
+ tbl_operators = _TableOperatorUtils([],
7364
+ df,
7365
+ "udf",
7366
+ user_function,
7367
+ exec_mode,
7368
+ chunk_size=None,
7369
+ returns=returns,
7370
+ delimiter=delimiter,
7371
+ quotechar=quotechar,
7372
+ num_rows=1,
7373
+ auth=None,
7374
+ data_partition_column=None,
7375
+ data_hash_column=None,
7376
+ data_order_column=None,
7377
+ is_local_order=None,
7378
+ nulls_first=None,
7379
+ sort_ascending=None,
7380
+ charset=None,
7381
+ env_name = env_name,
7382
+ style = "csv",
7383
+ function_args=function_args,
7384
+ columns_definitions=columns_definitions,
7385
+ output_type_converters={
7386
+ col_name: _Dtypes._teradata_type_to_python_type(col_type)
7387
+ for col_name, col_type in returns.items()})
7388
+
7389
+ df = tbl_operators.execute()
7390
+ return df
7035
7391
 
7036
7392
  @collect_queryband(queryband="DF_assign")
7037
7393
  def assign(self, drop_columns=False, **kwargs):
@@ -7043,10 +7399,12 @@ class DataFrame():
7043
7399
  drop_columns:
7044
7400
  Optional Argument.
7045
7401
  If True, drop columns that are not specified in assign.
7046
- Note:
7047
- When DataFrame.assign() is run on DataFrame.groupby(), this argument
7048
- is ignored. In such cases, all columns are dropped and only new columns
7049
- and grouping columns are returned.
7402
+ Notes:
7403
+ 1. When DataFrame.assign() is run on DataFrame.groupby(), this argument
7404
+ is ignored. In such cases, all columns are dropped and only new columns
7405
+ and grouping columns are returned.
7406
+ 2. Argument is ignored for UDF functions.
7407
+
7050
7408
  Default Value: False
7051
7409
  Types: bool
7052
7410
 
@@ -7062,6 +7420,7 @@ class DataFrame():
7062
7420
  * SQLAlchemy ClauseElements.
7063
7421
  (See teradataml extension with SQLAlchemy in teradataml User Guide
7064
7422
  and Function reference guide for more details)
7423
+ * Function - udf.
7065
7424
 
7066
7425
 
7067
7426
  RETURNS:
@@ -7087,6 +7446,16 @@ class DataFrame():
7087
7446
  used, but the column used in such function must be a part of group by columns.
7088
7447
  See examples for teradataml extension with SQLAlchemy on using various
7089
7448
  functions with DataFrame.assign().
7449
+ 6. UDF expressions can run on both Vantage Cloud Lake leveraging Apply Table Operator
7450
+ of Open Analytics Framework and Enterprise leveraging Vantage's Script Table Operator.
7451
+ 7. One can pass both regular expressions and udf expressions to this API.
7452
+ However, regular expressions are computed first followed by udf expressions.
7453
+ Hence the order of columns also maintained in same order.
7454
+ Look at Example 18 to understand more.
7455
+ 8. While passing multiple udf expressions, one can not pass one column output
7456
+ as another column input in the same ``assign`` call.
7457
+ 9. If user pass multiple udf expressions, delimiter and quotechar specified in
7458
+ last udf expression are considered for processing.
7090
7459
 
7091
7460
  RAISES:
7092
7461
  1. ValueError - When a callable is passed as a value, or columns from different
@@ -7348,6 +7717,134 @@ class DataFrame():
7348
7717
  1 Advanced 2.886226 3.508750 84.21
7349
7718
  2 Novice 6.377775 3.559091 39.15
7350
7719
  >>>
7720
+
7721
+ #
7722
+ # Executing user defined function (UDF) with assign()
7723
+ #
7724
+ # Example 15: Create two user defined functions to 'to_upper' and 'sum',
7725
+ # 'to_upper' to get the values in 'accounts' to upper case and
7726
+ # 'sum' to add length of string values in column 'accounts'
7727
+ # with column 'Feb' and store the result in Integer type column.
7728
+ >>> @udf
7729
+ ... def to_upper(s):
7730
+ ... if s is not None:
7731
+ ... return s.upper()
7732
+ >>>
7733
+ >>> from teradatasqlalchemy.types import INTEGER
7734
+ >>> @udf(returns=INTEGER())
7735
+ ... def sum(x, y):
7736
+ ... return len(x)+y
7737
+ >>>
7738
+ # Assign both Column Expressions returned by user defined functions
7739
+ # to the DataFrame.
7740
+ >>> res = df.assign(upper_stats = to_upper('accounts'), len_sum = sum('accounts', 'Feb'))
7741
+ >>> res
7742
+ Feb Jan Mar Apr datetime upper_stats len_sum
7743
+ accounts
7744
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC 98
7745
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC 207
7746
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC 100
7747
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC 209
7748
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC 220
7749
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO 218
7750
+ >>>
7751
+
7752
+ # Example 16: Create a user defined function to add 4 to the 'datetime' column
7753
+ # and store the result in DATE type column.
7754
+ >>> from teradatasqlalchemy.types import DATE
7755
+ >>> import datetime
7756
+ >>> @udf(returns=DATE())
7757
+ ... def add_date(x, y):
7758
+ ... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y)).strftime("%y/%m/%d")
7759
+ >>>
7760
+ # Assign the Column Expression returned by user defined function
7761
+ # to the DataFrame.
7762
+ >>> res = df.assign(new_date = add_date('datetime', 4))
7763
+ >>> res
7764
+ Feb Jan Mar Apr datetime new_date
7765
+ accounts
7766
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
7767
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
7768
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
7769
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
7770
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
7771
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
7772
+ >>>
7773
+
7774
+ # Example 17: Create a user defined functions to 'to_upper' to get
7775
+ # the values in 'accounts' to upper case and create a
7776
+ # new column with a string literal value.
7777
+ >>> @udf
7778
+ ... def to_upper(s):
7779
+ ... if s is not None:
7780
+ ... return s.upper()
7781
+ >>>
7782
+ # Assign both expressions to the DataFrame.
7783
+ >>> res = df.assign(upper_stats = to_upper('accounts'), new_col = 'string')
7784
+ >>> res
7785
+ Feb Jan Mar Apr datetime new_col upper_stats
7786
+ accounts
7787
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 string ALPHA CO
7788
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 string BLUE INC
7789
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 string YELLOW INC
7790
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 string JONES LLC
7791
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 string RED INC
7792
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 string ORANGE INC
7793
+ >>>
7794
+
7795
+ # Example 18: Create two user defined functions to 'to_upper' and 'sum'
7796
+ # and create new columns with string literal value and
7797
+ # arithmetic operation on column 'Feb'.
7798
+ >>> @udf
7799
+ ... def to_upper(s):
7800
+ ... if s is not None:
7801
+ ... return s.upper()
7802
+ >>>
7803
+ >>> from teradatasqlalchemy.types import INTEGER
7804
+ >>> @udf(returns=INTEGER())
7805
+ ... def sum(x, y):
7806
+ ... return len(x)+y
7807
+ >>>
7808
+ # Assign all expressions to the DataFrame.
7809
+ >>> res = df.assign(upper_stats = to_upper('accounts'),new_col = 'abc',
7810
+ ... len_sum = sum('accounts', 'Feb'), col_sum = df.Feb+1)
7811
+ >>> res
7812
+ Feb Jan Mar Apr datetime col_sum new_col upper_stats len_sum
7813
+ accounts
7814
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 91.0 abc BLUE INC 98
7815
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 211.0 abc ALPHA CO 218
7816
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 201.0 abc JONES LLC 209
7817
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 91.0 abc YELLOW INC 100
7818
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 211.0 abc ORANGE INC 220
7819
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 201.0 abc RED INC 207
7820
+ >>>
7821
+
7822
+ # Example 19: Convert the values is 'accounts' column to upper case using a user
7823
+ # defined function on Vantage Cloud Lake.
7824
+ # Create a Python 3.10.5 environment with given name and description in Vantage.
7825
+ >>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
7826
+ User environment 'test_udf' created.
7827
+ >>>
7828
+ # Create a user defined functions to 'to_upper' to get the values in upper case
7829
+ # and pass the user env to run it on.
7830
+ >>> from teradataml.dataframe.functions import udf
7831
+ >>> @udf(env_name = env)
7832
+ ... def to_upper(s):
7833
+ ... if s is not None:
7834
+ ... return s.upper()
7835
+ >>>
7836
+ # Assign the Column Expression returned by user defined function
7837
+ # to the DataFrame.
7838
+ >>> df.assign(upper_stats = to_upper('accounts'))
7839
+ Feb Jan Mar Apr datetime upper_stats
7840
+ accounts
7841
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
7842
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
7843
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
7844
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
7845
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
7846
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
7847
+ >>>
7351
7848
  """
7352
7849
  # Argument validations
7353
7850
  awu_matrix = []
@@ -7393,13 +7890,35 @@ class DataFrame():
7393
7890
  msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
7394
7891
  raise TeradataMlException(msg, MessageCodes.TDMLDF_INFO_ERROR)
7395
7892
 
7396
- try:
7397
- (new_meta, new_nodeid) = self._generate_assign_metaexpr_aed_nodeid(drop_columns, **kwargs)
7398
- return self._create_dataframe_from_node(new_nodeid, new_meta, self._index_label)
7399
- except Exception as err:
7400
- errcode = MessageCodes.TDMLDF_INFO_ERROR
7401
- msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
7402
- raise TeradataMlException(msg, errcode) from err
7893
+ # Create a dictionary of column name to udf expressions and
7894
+ # column name to normal/regular expressions.
7895
+ udf_expr = {}
7896
+ regular_expr = {}
7897
+ for colname, col in kwargs.items():
7898
+ # If value passed in kwargs is a ColumnExpression and is a udf, store it.
7899
+ if isinstance(col, ColumnExpression) and col._udf:
7900
+ udf_expr[colname] = col
7901
+ else:
7902
+ regular_expr[colname] = col
7903
+ df = self
7904
+
7905
+ # If kwargs contains both regular and udf expressions, first create new columns
7906
+ # from normal/regular expressions then on the output dataframe create new columns
7907
+ # from udf expression.
7908
+ if bool(regular_expr):
7909
+ try:
7910
+ (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(drop_columns, **regular_expr)
7911
+ df = df._create_dataframe_from_node(new_nodeid, new_meta, df._index_label)
7912
+ except Exception as err:
7913
+ errcode = MessageCodes.TDMLDF_INFO_ERROR
7914
+ msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
7915
+ raise TeradataMlException(msg, errcode) from err
7916
+
7917
+ if bool(udf_expr):
7918
+ df = df._assign_udf(udf_expr)
7919
+
7920
+ return df
7921
+
7403
7922
 
7404
7923
  @collect_queryband(queryband="DF_get")
7405
7924
  def get(self, key):
@@ -10013,9 +10532,10 @@ class DataFrame():
10013
10532
  case_when_then = {}
10014
10533
  list_of_fracs = frac
10015
10534
 
10016
- # When stratify column is passed for sample then perform TrainTestSplit
10017
- # for data sampling.
10018
- if stratify_column is not None:
10535
+ # When stratify column is passed for sample or when seed is passed for
10536
+ # reproducibilty of result then
10537
+ # perform TrainTestSplit for data sampling.
10538
+ if stratify_column is not None or seed is not None:
10019
10539
  # Local import TrainTestSplit function.
10020
10540
  from teradataml.analytics.sqle import TrainTestSplit
10021
10541
 
@@ -10029,7 +10549,16 @@ class DataFrame():
10029
10549
  train_size=list_of_fracs[0],
10030
10550
  test_size=list_of_fracs[1],
10031
10551
  stratify_column=stratify_column,
10032
- seed=seed)
10552
+ seed=seed,
10553
+ persist=True,
10554
+ display_table_name=False)
10555
+
10556
+ # Retrieve the table name from TrainTestSplit_out object.
10557
+ table_name = TrainTestSplit_out.result._table_name
10558
+
10559
+ # Add the table to garbage collector.
10560
+ table_added = GarbageCollector._add_to_garbagecollector(table_name)
10561
+
10033
10562
  # Retrieve the sampled result and updated the column name and values
10034
10563
  # for backward compatibility.
10035
10564
  _sampled_df = TrainTestSplit_out.result
@@ -10133,10 +10662,10 @@ class DataFrame():
10133
10662
 
10134
10663
  # Make this non-lazy. Added this in order to fix https://teradata-pe.atlassian.net/browse/ELE-6368
10135
10664
  # Cannot use __execute_node_and_set_table_name because self points to original df.
10136
- # Hence, setting the __table_name with _execute_node_return_db_object_name.
10665
+ # Hence, setting the _table_name with _execute_node_return_db_object_name.
10137
10666
 
10138
10667
  df = self._create_dataframe_from_node(sample_node_id, new_metaexpr, self._index_label)
10139
- df.__table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
10668
+ df._table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
10140
10669
 
10141
10670
  return df
10142
10671
 
@@ -10267,26 +10796,14 @@ class DataFrame():
10267
10796
  where admitted > 0) as temp_table SAMPLE 0.9'
10268
10797
 
10269
10798
  """
10799
+ # Argument validations
10800
+ awu_matrix = []
10801
+ awu_matrix.append(["full_query", full_query, False, (bool)])
10802
+ # Validate argument types
10803
+ _Validators._validate_function_arguments(awu_matrix)
10270
10804
 
10271
10805
  try:
10272
- # Argument validations
10273
- awu_matrix = []
10274
- awu_matrix.append(["full_query", full_query, False, (bool)])
10275
- # Validate argument types
10276
- _Validators._validate_function_arguments(awu_matrix)
10277
-
10278
- node_id = self._nodeid
10279
-
10280
- if isinstance(self, (DataFrameGroupBy, DataFrameGroupByTime)):
10281
- # If dataframe is either of type groupby or groupbytime
10282
- # then get it's parent dataframe nodeid and return queries
10283
- # for the same
10284
- node_id = self._aed_utils._aed_get_parent_nodeids(self._nodeid)[0]
10285
-
10286
- queries = self._aed_utils._aed_show_query(node_id, query_with_reference_to_top=full_query)
10287
-
10288
- return queries[0][0]
10289
-
10806
+ return self.__generate_aed_query(full_query)
10290
10807
  except TeradataMlException:
10291
10808
  raise
10292
10809
 
@@ -10296,7 +10813,7 @@ class DataFrame():
10296
10813
  except Exception as err:
10297
10814
  errcode = MessageCodes.TDMLDF_INFO_ERROR
10298
10815
  msg = Messages.get_message(errcode)
10299
- raise TeradataMlException(msg, errcode) from err
10816
+ raise TeradataMlException(msg, errcode) from err
10300
10817
 
10301
10818
  @collect_queryband(queryband="DF_mapRow")
10302
10819
  def map_row(self,
@@ -13755,7 +14272,7 @@ class DataFrame():
13755
14272
  Types: int OR NoneType
13756
14273
 
13757
14274
  RETURNS:
13758
- iterator, an object to iterate over namedtuples for each row in the DataFrame.
14275
+ iterator, an object to iterate over row in the DataFrame.
13759
14276
 
13760
14277
  RAISES:
13761
14278
  None
@@ -13804,9 +14321,10 @@ class DataFrame():
13804
14321
  cur = execute_sql(query)
13805
14322
 
13806
14323
  if name:
14324
+ columns = [column[0] for column in cur.description]
13807
14325
  for rec in cur:
13808
- Row = namedtuple(name, [column[0] for column in cur.description])
13809
- yield Row(*rec)
14326
+ row = _Row(columns=columns, values=rec)
14327
+ yield row
13810
14328
  else:
13811
14329
  for rec in cur:
13812
14330
  yield rec
@@ -16626,7 +17144,7 @@ class _TDUAF(DataFrame):
16626
17144
  # UAF Functions do not accept double quotes.
16627
17145
  db_name = UtilFuncs._extract_db_name(table_name)
16628
17146
  if db_name:
16629
- table_name = "{}.{}".format(db_name, UtilFuncs._extract_table_name(table_name))
17147
+ table_name = '"{}"."{}"'.format(db_name, UtilFuncs._extract_table_name(table_name))
16630
17148
  else:
16631
17149
  table_name = UtilFuncs._extract_table_name(table_name)
16632
17150