teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -19,6 +19,7 @@ import pandas as pd
19
19
  import re
20
20
  import sqlalchemy
21
21
  import sys
22
+ import urllib.parse
22
23
  import teradataml.context.context as tdmlctx
23
24
 
24
25
  from collections import OrderedDict, namedtuple
@@ -42,7 +43,9 @@ from teradataml.dataframe.indexer import _LocationIndexer
42
43
  from teradataml.common.aed_utils import AedUtils
43
44
  from teradataml.options.display import display
44
45
  from teradataml.dataframe.copy_to import copy_to_sql
46
+ from teradataml.dataframe.row import _Row
45
47
  from teradataml.dataframe.setop import concat
48
+ from teradataml.dbutils.dbutils import list_td_reserved_keywords
46
49
  from teradataml.plot.plot import _Plot
47
50
  from teradataml.scriptmgmt.UserEnv import UserEnv
48
51
  from teradataml.utils.dtypes import _Dtypes, _ListOf, _TupleOf
@@ -53,7 +56,10 @@ from teradatasql import OperationalError
53
56
  from teradataml.dataframe.window import Window
54
57
  from teradataml.dataframe.data_transfer import _DataTransferUtils
55
58
  from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
56
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
59
+ from teradataml.telemetry_utils.queryband import collect_queryband
60
+ from teradataml.options.configure import configure
61
+ from teradataml.utils.internal_buffer import _InternalBuffer
62
+ from teradataml.common.constants import OutputStyle
57
63
 
58
64
  # TODO use logger when available on master branch
59
65
  # logger = teradatapylog.getLogger()
@@ -151,6 +157,11 @@ class DataFrame():
151
157
  # This attribute added to add setter for columns property,
152
158
  # it is required when setting columns from groupby
153
159
  self._columns = None
160
+ # This attribute stores the internal AED query and avoid multiple
161
+ # calls to AED utility function aed_show_query()
162
+ self._aed_query = None
163
+ # This attribute stores the type of query stored in self._aed_query.
164
+ self._is_full_query = None
154
165
 
155
166
  # Property to determine if table is an ART table or not.
156
167
  self._is_art = None
@@ -221,7 +232,7 @@ class DataFrame():
221
232
 
222
233
  self._nodeid = self._aed_utils._aed_query(self._query, temp_table_name)
223
234
  else:
224
- if inspect.stack()[1][3] not in ['_from_node', '__init__']:
235
+ if inspect.stack()[1][3] not in ['_from_node', '__init__', 'alias']:
225
236
  raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
226
237
  MessageCodes.TDMLDF_CREATE_FAIL)
227
238
 
@@ -233,6 +244,7 @@ class DataFrame():
233
244
  self._iloc = _LocationIndexer(self, integer_indexing=True)
234
245
  self.__data = None
235
246
  self.__data_columns = None
247
+ self._alias = None
236
248
 
237
249
  except TeradataMlException:
238
250
  raise
@@ -242,6 +254,100 @@ class DataFrame():
242
254
  raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
243
255
  MessageCodes.TDMLDF_CREATE_FAIL) from err
244
256
 
257
+ @property
258
+ def db_object_name(self):
259
+ """
260
+ DESCRIPTION:
261
+ Get the underlying database object name, on which DataFrame is
262
+ created.
263
+
264
+ RETURNS:
265
+ str representing object name of DataFrame
266
+
267
+ EXAMPLES:
268
+ >>> load_example_data("dataframe", "sales")
269
+ >>> df = DataFrame('sales')
270
+ >>> df.db_object_name
271
+ '"sales"'
272
+ """
273
+ if self._table_name is not None:
274
+ return self._table_name
275
+ else:
276
+ msg = "Object name is available once DataFrame is materialized. " \
277
+ "Use DataFrame.materialize() to materialize DataFrame."
278
+ print(msg)
279
+
280
+ def alias(self, alias_name):
281
+ """
282
+ DESCRIPTION:
283
+ Method to create an aliased teradataml DataFrame.
284
+ Note:
285
+ * This method is recommended to be used before performing
286
+ self join using DataFrame's join() API.
287
+
288
+ PARAMETERS:
289
+ alias_name:
290
+ Required Argument.
291
+ Specifies the alias name to be assigned to a teradataml DataFrame.
292
+ Types: str
293
+
294
+ RETURNS:
295
+ teradataml DataFrame
296
+
297
+ EXAMPLES:
298
+ >>> load_example_data("dataframe", "admissions_train")
299
+ >>> df = DataFrame("admissions_train")
300
+ >>> df
301
+ masters gpa stats programming admitted
302
+ id
303
+ 13 no 4.00 Advanced Novice 1
304
+ 26 yes 3.57 Advanced Advanced 1
305
+ 5 no 3.44 Novice Novice 0
306
+ 19 yes 1.98 Advanced Advanced 0
307
+ 15 yes 4.00 Advanced Advanced 1
308
+ 40 yes 3.95 Novice Beginner 0
309
+ 7 yes 2.33 Novice Novice 1
310
+ 22 yes 3.46 Novice Beginner 0
311
+ 36 no 3.00 Advanced Novice 0
312
+ 38 yes 2.65 Advanced Beginner 1
313
+
314
+ # Example 1: Create an alias of teradataml DataFrame.
315
+
316
+ >>> df2 = df.alias("adm_trn")
317
+
318
+ # Print aliased DataFrame.
319
+ >>> df2
320
+ masters gpa stats programming admitted
321
+ id
322
+ 13 no 4.00 Advanced Novice 1
323
+ 26 yes 3.57 Advanced Advanced 1
324
+ 5 no 3.44 Novice Novice 0
325
+ 19 yes 1.98 Advanced Advanced 0
326
+ 15 yes 4.00 Advanced Advanced 1
327
+ 40 yes 3.95 Novice Beginner 0
328
+ 7 yes 2.33 Novice Novice 1
329
+ 22 yes 3.46 Novice Beginner 0
330
+ 36 no 3.00 Advanced Novice 0
331
+ 38 yes 2.65 Advanced Beginner 1
332
+ """
333
+ arg_info_matrix = [["alias_name", alias_name, False, (str), True]]
334
+ _Validators._validate_function_arguments(arg_info_matrix)
335
+ try:
336
+ alias_df = self._from_node(self._nodeid, self._metaexpr, self._index_label,
337
+ reuse_metaexpr=False)
338
+ # Assigning self attributes to newly created alias dataframe.
339
+ alias_df._table_name = self._table_name
340
+ alias_df._index = self._index
341
+ alias_df._index_label = self._index_label
342
+ setattr(alias_df._metaexpr.t, "table_alias", alias_name)
343
+ alias_df._alias = alias_name
344
+ return alias_df
345
+ except Exception as err:
346
+ error_code = MessageCodes.EXECUTION_FAILED
347
+ error_msg = Messages.get_message(
348
+ error_code, "create alias dataFrame", '{}'.format(str(err)))
349
+ raise TeradataMlException(error_msg, error_code)
350
+
245
351
  @classmethod
246
352
  @collect_queryband(queryband="DF_fromTable")
247
353
  def from_table(cls, table_name, index=True, index_label=None):
@@ -356,7 +462,7 @@ class DataFrame():
356
462
  return cls(index=index, index_label=index_label, query=query, materialize=materialize)
357
463
 
358
464
  @classmethod
359
- def _from_node(cls, nodeid, metaexpr, index_label=None, undropped_index=None):
465
+ def _from_node(cls, nodeid, metaexpr, index_label=None, undropped_index=None, reuse_metaexpr=True):
360
466
  """
361
467
  Private class method for creating a DataFrame from a nodeid and parent metadata.
362
468
 
@@ -377,6 +483,12 @@ class DataFrame():
377
483
  Optional Argument.
378
484
  List specifying index column(s) to be retained as columns for printing.
379
485
 
486
+ reuse_metaexpr:
487
+ Optional Argument.
488
+ Specifies the flag to decide whether to use same _MetaExpression object or not.
489
+ Default Value: True
490
+ Types: bool
491
+
380
492
  EXAMPLES:
381
493
  from teradataml.dataframe.dataframe import DataFrame
382
494
  df = DataFrame._from_node(1234, metaexpr)
@@ -392,32 +504,171 @@ class DataFrame():
392
504
  df = cls()
393
505
  df._nodeid = nodeid
394
506
  df._source_type = SourceType.TABLE.value
395
- df._get_metadata_from_metaexpr(metaexpr)
507
+
508
+ if not reuse_metaexpr:
509
+ # Create new _MetaExpression object using reference metaExpression
510
+ # for newly created DataFrame.
511
+ df._metaexpr = UtilFuncs._get_metaexpr_using_parent_metaexpr(nodeid, metaexpr)
512
+ # When metaexpression is created using only column information from parent DataFrame,
513
+ # underlying SQLAlchemy table is created with '' string as Table name.
514
+ # Assign name from reference mataexpression here.
515
+ df._metaexpr.t.name = metaexpr.t.name
516
+ # Populate corresponding information into newly created DataFrame object
517
+ # using newly created metaExpression.
518
+ df._get_metadata_from_metaexpr(df._metaexpr)
519
+ else:
520
+ # Populate corresponding information into newly created DataFrame object
521
+ # using reference metaExpression.
522
+ df._get_metadata_from_metaexpr(metaexpr)
396
523
 
397
524
  if isinstance(index_label, str):
398
525
  index_label = [index_label]
399
526
 
400
- if index_label is not None and all(elem in [col.name for col in metaexpr.c] for elem in index_label):
527
+ if index_label is not None and all(elem in [col.name for col in df._metaexpr.c] for elem in index_label):
401
528
  df._index_label = index_label
402
529
  elif index_label is not None and all(UtilFuncs._teradata_quote_arg(elem, "\"", False)
403
- in [col.name for col in metaexpr.c] for elem in index_label):
530
+ in [col.name for col in df._metaexpr.c] for elem in index_label):
404
531
  df._index_label = index_label
405
532
 
406
533
  # Set the flag suggesting that the _index_label is set,
407
- # and that a database lookup wont be required even when it is None.
534
+ # and that a database lookup won't be required even when it is None.
408
535
  df._index_query_required = False
409
536
 
410
537
  if isinstance(undropped_index, str):
411
538
  undropped_index = [undropped_index]
412
539
 
413
- if undropped_index is not None and all(elem in [col.name for col in metaexpr.c] for elem in undropped_index):
540
+ if undropped_index is not None and all(elem in [col.name for col in df._metaexpr.c] for elem in undropped_index):
414
541
  df._undropped_index = undropped_index
415
542
  elif undropped_index is not None and all(UtilFuncs._teradata_quote_arg(elem, "\"", False)
416
- in [col.name for col in metaexpr.c] for elem in undropped_index):
543
+ in [col.name for col in df._metaexpr.c] for elem in undropped_index):
417
544
  df._undropped_index = undropped_index
418
545
 
419
546
  return df
420
547
 
548
+ def create_temp_view(self, name):
549
+ """
550
+ DESCRIPTION:
551
+ Creates a temporary view for session on the DataFrame.
552
+
553
+ PARAMETERS:
554
+ name:
555
+ Required Argument.
556
+ Specifies the name of the temporary view.
557
+ Type: str
558
+
559
+ RETURNS:
560
+ None
561
+
562
+ RAISES:
563
+ OperationalError (When view already exists).
564
+
565
+ EXAMPLES:
566
+ # Load the data to run the example.
567
+ >>> load_example_data("dataframe", "admissions_train")
568
+ >>> df = DataFrame("admissions_train")
569
+ >>> df
570
+ masters gpa stats programming admitted
571
+ id
572
+ 38 yes 2.65 Advanced Beginner 1
573
+ 7 yes 2.33 Novice Novice 1
574
+ 26 yes 3.57 Advanced Advanced 1
575
+ 17 no 3.83 Advanced Advanced 1
576
+ 34 yes 3.85 Advanced Beginner 0
577
+ 13 no 4.00 Advanced Novice 1
578
+ 32 yes 3.46 Advanced Beginner 0
579
+ 11 no 3.13 Advanced Advanced 1
580
+ 15 yes 4.00 Advanced Advanced 1
581
+ 36 no 3.00 Advanced Novice 0
582
+
583
+ # Example 1: Create view 'new_admissions'.
584
+ >>> df.create_temp_view("new_admissions")
585
+ >>> new_df = DataFrame("new_admissions")
586
+ >>> new_df
587
+ masters gpa stats programming admitted
588
+ id
589
+ 38 yes 2.65 Advanced Beginner 1
590
+ 7 yes 2.33 Novice Novice 1
591
+ 26 yes 3.57 Advanced Advanced 1
592
+ 17 no 3.83 Advanced Advanced 1
593
+ 34 yes 3.85 Advanced Beginner 0
594
+ 13 no 4.00 Advanced Novice 1
595
+ 32 yes 3.46 Advanced Beginner 0
596
+ 11 no 3.13 Advanced Advanced 1
597
+ 15 yes 4.00 Advanced Advanced 1
598
+ 36 no 3.00 Advanced Novice 0
599
+ """
600
+ # Validating Arguments
601
+ arg_type_matrix = []
602
+ arg_type_matrix.append(["name", name, False, (str), True])
603
+ _Validators._validate_function_arguments(arg_type_matrix)
604
+
605
+ GarbageCollector._add_to_garbagecollector(name, TeradataConstants.TERADATA_VIEW)
606
+ UtilFuncs._create_view(name, self.show_query())
607
+
608
+ def materialize(self):
609
+ """
610
+ DESCRIPTION:
611
+ Method to materialize teradataml DataFrame into a database object.
612
+ Notes:
613
+ * DataFrames are materialized in either view/table/volatile table,
614
+ which is decided and taken care by teradataml.
615
+ * If user wants to materialize object into specific database object
616
+ such as table/volatile table, use 'to_sql()' or 'copy_to_sql()' or
617
+ 'fastload()' functions.
618
+ * Materialized object is garbage collected at the end of the session.
619
+
620
+ PARAMETERS:
621
+ None
622
+
623
+ RETURNS:
624
+ DataFrame
625
+
626
+ EXAMPLES:
627
+ >>> load_example_data("dataframe", "admissions_train")
628
+ >>> df = DataFrame("admissions_train")
629
+ >>> df
630
+ masters gpa stats programming admitted
631
+ id
632
+ 13 no 4.00 Advanced Novice 1
633
+ 26 yes 3.57 Advanced Advanced 1
634
+ 5 no 3.44 Novice Novice 0
635
+ 19 yes 1.98 Advanced Advanced 0
636
+ 15 yes 4.00 Advanced Advanced 1
637
+ 40 yes 3.95 Novice Beginner 0
638
+ 7 yes 2.33 Novice Novice 1
639
+ 22 yes 3.46 Novice Beginner 0
640
+ 36 no 3.00 Advanced Novice 0
641
+ 38 yes 2.65 Advanced Beginner 1
642
+
643
+ # Example 1: Perform operations on teradataml DataFrame
644
+ # and materializeit in a database object.
645
+ >>> df2 = df.get([["id", "masters", "gpa"]])
646
+
647
+ # Initially table_name will be None.
648
+ >>> df2._table_name
649
+
650
+ >>> df2.materialize()
651
+ masters gpa
652
+ id
653
+ 15 yes 4.00
654
+ 7 yes 2.33
655
+ 22 yes 3.46
656
+ 17 no 3.83
657
+ 13 no 4.00
658
+ 38 yes 2.65
659
+ 26 yes 3.57
660
+ 5 no 3.44
661
+ 34 yes 3.85
662
+ 40 yes 3.95
663
+
664
+ # After materialize(), view name will be assigned.
665
+ >>> df2._table_name
666
+ '"ALICE"."ml__select__172077355985236"'
667
+ >>>
668
+ """
669
+ self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
670
+ return self
671
+
421
672
  @collect_queryband(queryband="DF_fillna")
422
673
  def fillna(self, value=None, columns=None, literal_value=False):
423
674
  """
@@ -657,7 +908,10 @@ class DataFrame():
657
908
  Private method for setting _metaexpr and retrieving column names and types.
658
909
 
659
910
  PARAMETERS:
660
- metaexpr - Parent meta data (_MetaExpression object).
911
+ metaexpr:
912
+ Required Argument.
913
+ Specifies parent meta data (_MetaExpression object).
914
+ Types: _MetaExpression
661
915
 
662
916
  RETURNS:
663
917
  None
@@ -670,7 +924,8 @@ class DataFrame():
670
924
  self._column_names_and_types = []
671
925
  self._td_column_names_and_types = []
672
926
  self._td_column_names_and_sqlalchemy_types = {}
673
- for col in metaexpr.c:
927
+
928
+ for col in self._metaexpr.c:
674
929
  if isinstance(col.type, sqlalchemy.sql.sqltypes.NullType):
675
930
  tdtype = TeradataTypes.TD_NULL_TYPE.value
676
931
  else:
@@ -1934,7 +2189,7 @@ class DataFrame():
1934
2189
  else:
1935
2190
  col_filters = col_names
1936
2191
 
1937
- col_filters_decode = ["decode(\"{}\", null, 0, 1)".format(col_name) for col_name in col_filters]
2192
+ col_filters_decode = ["CASE WHEN \"{}\" IS NULL THEN 0 ELSE 1 END".format(col_name) for col_name in col_filters]
1938
2193
  fmt_filter = " + ".join(col_filters_decode)
1939
2194
 
1940
2195
  if thresh is not None:
@@ -5421,6 +5676,8 @@ class DataFrame():
5421
5676
  result = self._check_numeric_overflow(agg_df)
5422
5677
  """
5423
5678
  try:
5679
+ # Printing the DF will actually run underlying select query and
5680
+ # will brought up numeric overflow if any. Only materializing won't work.
5424
5681
  repr(result_df)
5425
5682
  return False
5426
5683
  except TeradataMlException as tme:
@@ -5557,18 +5814,73 @@ class DataFrame():
5557
5814
  EXAMPLES:
5558
5815
  self.__get_data_columns()
5559
5816
  """
5560
- self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
5561
-
5562
- query = repr(self._metaexpr) + ' FROM ' + self._table_name
5817
+ if not self._table_name:
5818
+ if not self._aed_query:
5819
+ self.__generate_aed_query()
5820
+ # TODO: Check the length of query and if it fails, create a view in catch block.
5821
+ # Address in this JIRA: https://teradata-pe.atlassian.net/browse/ELE-6922
5822
+ query = repr(self._metaexpr) + ' FROM ( ' + self._aed_query + ' ) as temp_table'
5823
+ else:
5824
+ query = repr(self._metaexpr) + ' FROM ' + self._table_name
5563
5825
 
5564
5826
  if self._orderby is not None:
5565
5827
  query += ' ORDER BY ' + self._orderby
5566
5828
 
5829
+ query += ';'
5567
5830
  # Execute the query and get the results in a list.
5568
5831
  self.__data, self.__data_columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
5569
5832
 
5570
5833
  return self.__data, self.__data_columns
5571
5834
 
5835
+ def __generate_aed_query(self, full_query=False):
5836
+ """
5837
+ DESCRIPTION:
5838
+ Internal function to return underlying SQL for the teradataml
5839
+ DataFrame. It is the same SQL that is used to view the data for
5840
+ a teradataml DataFrame.
5841
+
5842
+ PARAMETERS:
5843
+ full_query:
5844
+ Optional Argument.
5845
+ Specifies if the complete query for the dataframe should be returned.
5846
+ When this parameter is set to True, query for the dataframe is returned
5847
+ with respect to the base dataframe's table (from_table() or from_query())
5848
+ or from the output tables of analytical functions (if there are any in the
5849
+ workflow). This query may or may not be directly used to retrieve data
5850
+ for the dataframe upon which the function is called.
5851
+ When this parameter is not used, string returned is the query already used
5852
+ or will be used to retrieve data for the teradataml DataFrame.
5853
+ Default Value: False
5854
+ Types: bool
5855
+
5856
+ RETURNS:
5857
+ String representing the underlying SQL query for the teradataml DataFrame.
5858
+
5859
+ RAISES:
5860
+ None.
5861
+
5862
+ EXAMPLES:
5863
+ self.__generate_aed_query()
5864
+ """
5865
+ # Run aed call only when _aed_query is None or
5866
+ # the type of current stored query (full/short) is not matching
5867
+ # with asked query type.
5868
+ if (not self._aed_query) or (not self._is_full_query == full_query):
5869
+ node_id = self._nodeid
5870
+
5871
+ if isinstance(self, (DataFrameGroupBy, DataFrameGroupByTime)):
5872
+ # If dataframe is either of type groupby or groupbytime
5873
+ # then get its parent dataframe nodeid and return queries
5874
+ # for the same
5875
+ node_id = self._aed_utils._aed_get_parent_nodeids(self._nodeid)[0]
5876
+
5877
+ queries = self._aed_utils._aed_show_query(node_id, query_with_reference_to_top=full_query)
5878
+ # Store query and type of query in class attributes to avoid future runs.
5879
+ self._aed_query = queries[0][0]
5880
+ self._is_full_query = full_query
5881
+
5882
+ return self._aed_query
5883
+
5572
5884
  @collect_queryband(queryband="DF_select")
5573
5885
  def select(self, select_expression):
5574
5886
  """
@@ -5830,6 +6142,8 @@ class DataFrame():
5830
6142
  * "open_sessions" specifies the number of Teradata data transfer
5831
6143
  sessions to be opened for fastexport. This argument is only applicable
5832
6144
  in fastexport mode.
6145
+ * Function returns the pandas dataframe with Decimal columns types as float instead of object.
6146
+ If user want datatype to be object, set argument "coerce_float" to False.
5833
6147
 
5834
6148
  Notes:
5835
6149
  1. For additional information about "coerce_float" and
@@ -6145,15 +6459,22 @@ class DataFrame():
6145
6459
  Supported join operators are =, ==, <, <=, >, >=, <> and != (= and <> operators are
6146
6460
  not supported when using DataFrame columns as operands).
6147
6461
 
6148
- Note:
6149
- 1. When multiple join conditions are given, they are joined using AND boolean
6150
- operator. Other boolean operators are not supported.
6151
- 2. Nesting of join on conditions in column expressions using & and | is not
6152
- supported. The example for unsupported nested join on conditions is:
6153
- on = [(df1.a == df1.b) & (df1.c == df1.d)]
6462
+ Notes:
6463
+ 1. When multiple join conditions are given as a list string/ColumnExpression,
6464
+ they are joined using AND operator.
6465
+ 2. Two or more on conditions can be combined using & and | operators
6466
+ and can be passed as single ColumnExpression.
6467
+ You can use (df1.a == df1.b) & (df1.c == df1.d) in place of
6468
+ [df1.a == df1.b, df1.c == df1.d].
6469
+ 3. Two or more on conditions can not be combined using pythonic 'and'
6470
+ and 'or'.
6471
+ You can use (df1.a == df1.b) & (df1.c == df1.d) in place of
6472
+ [df1.a == df1.b and df1.c == df1.d].
6473
+ 4. Performing self join using same DataFrame object in 'other'
6474
+ argument is not supported. In order to perform self join,
6475
+ first create aliased DataFrame using alias() API and pass it
6476
+ for 'other' argument. Refer to Example 10 in EXAMPLES section.
6154
6477
 
6155
- You can use [df1.a == df1.b, df1.c == df1.d] in place of
6156
- [(df1.a == df1.b) & (df1.c == df1.d)].
6157
6478
 
6158
6479
  PARAMETERS:
6159
6480
 
@@ -6181,15 +6502,20 @@ class DataFrame():
6181
6502
  is the column of left dataframe df1 and col2 is the column of right
6182
6503
  dataframe df2.
6183
6504
  Examples:
6184
- 1. [df1.a == df2.a, df1.b == df2.b] indicates df1.a = df2.a and df1.b = df2.b.
6185
- 2. [df1.a == df2.b, df1.c == df2.d] indicates df1.a = df2.b and df1.c = df2.d.
6186
- 3. [df1.a <= df2.b and df1.c > df2.d] indicates df1.a <= df2.b and df1.c > df2.d.
6187
- 4. [df1.a < df2.b and df1.c >= df2.d] indicates df1.a < df2.b and df1.c >= df2.d.
6505
+ 1. [df1.a == df2.a, df1.b == df2.b] indicates df1.a = df2.a AND df1.b = df2.b.
6506
+ 2. [df1.a == df2.b, df1.c == df2.d] indicates df1.a = df2.b AND df1.c = df2.d.
6507
+ 3. [df1.a <= df2.b & df1.c > df2.d] indicates df1.a <= df2.b AND df1.c > df2.d.
6508
+ 4. [df1.a < df2.b | df1.c >= df2.d] indicates df1.a < df2.b OR df1.c >= df2.d.
6188
6509
  5. df1.a != df2.b indicates df1.a != df2.b.
6189
6510
  • The combination of both string comparisons and comparisons as column expressions.
6190
6511
  Examples:
6191
- 1. ["a", df1.b == df2.b] indicates df1.a = df2.a and df1.b = df2.b.
6192
- 2. [df1.a <= df2.b, "c > d"] indicates df1.a <= df2.b and df1.c > df2.d.
6512
+ 1. ["a", df1.b == df2.b] indicates df1.a = df2.a AND df1.b = df2.b.
6513
+ 2. [df1.a <= df2.b, "c > d"] indicates df1.a <= df2.b AND df1.c > df2.d.
6514
+ • ColumnExpressions containing FunctionExpressions which represent SQL functions
6515
+ invoked on DataFrame Columns.
6516
+ Examples:
6517
+ 1. (df1.a.round(1) - df2.a.round(1)).mod(2.5) > 2
6518
+ 2. df1.a.floor() - df2.b.floor() > 2
6193
6519
 
6194
6520
  Types: str (or) ColumnExpression (or) List of strings(str) or ColumnExpressions
6195
6521
 
@@ -6211,7 +6537,7 @@ class DataFrame():
6211
6537
  Specifies the suffix to be added to the right table columns.
6212
6538
  Default Value: None.
6213
6539
  Types: str
6214
-
6540
+
6215
6541
  lprefix:
6216
6542
  Optional Argument.
6217
6543
  Specifies the prefix to be added to the left table columns.
@@ -6261,7 +6587,7 @@ class DataFrame():
6261
6587
  0 2 2 analytics 2.3 2.3 b analytics b
6262
6588
  1 1 1 teradata 1.3 1.3 a teradata a
6263
6589
 
6264
- # Example 2: One "on" argument condition is ColumnExpression and other is string having two
6590
+ # Example 2: One "on" argument condition is ColumnExpression and other is string having two
6265
6591
  # columns with left outer join.
6266
6592
  >>> df1.join(df2, on = [df1.col2 == df2.col4,"col5 = col7"], how = "left", lprefix = "t1", rprefix = "t2")
6267
6593
  t1_col1 t2_col1 col2 t1_col3 t2_col3 col5 col4 col7
@@ -6275,7 +6601,7 @@ class DataFrame():
6275
6601
  0 2 2 analytics 2.3 2.3 b analytics b
6276
6602
  1 1 1 teradata 1.3 1.3 a teradata a
6277
6603
 
6278
- # Example 4: One "on" argument condition is ColumnExpression and other is string having two
6604
+ # Example 4: One "on" argument condition is ColumnExpression and other is string having two
6279
6605
  # columns with full join.
6280
6606
  >>> df1.join(other = df2, on = ["col2=col4",df1.col5 == df2.col7], how = "full", lprefix = "t1", rprefix = "t2")
6281
6607
  t1_col1 t2_col1 col2 t1_col3 t2_col3 col5 col4 col7
@@ -6353,7 +6679,53 @@ class DataFrame():
6353
6679
  3 Beginner Beginner 1 3.95 Beginner 3.70 Novice 0 1 no yes
6354
6680
  3 Beginner Beginner 2 3.76 Beginner 3.70 Novice 0 1 no yes
6355
6681
  3 Beginner Novice 3 3.70 Beginner 3.70 Novice 1 1 no no
6682
+
6683
+ # Example 10: Perform self join using aliased DataFrame.
6684
+ # Create an aliased DataFrame.
6685
+ >>> lhs = DataFrame("admissions_train").head(3).sort("id")
6686
+ >>> rhs = lhs.alias("rhs")
6687
+ # Use aliased DataFrame for self join.
6688
+ >>> joined_df = lhs.join(other=rhs, how="cross", lprefix="l", rprefix="r")
6689
+ >>> joined_df
6690
+ l_id r_id l_masters r_masters l_gpa r_gpa l_stats r_stats l_programming r_programming l_admitted r_admitted
6691
+ 0 1 3 yes no 3.95 3.70 Beginner Novice Beginner Beginner 0 1
6692
+ 1 2 2 yes yes 3.76 3.76 Beginner Beginner Beginner Beginner 0 0
6693
+ 2 2 3 yes no 3.76 3.70 Beginner Novice Beginner Beginner 0 1
6694
+ 3 3 1 no yes 3.70 3.95 Novice Beginner Beginner Beginner 1 0
6695
+ 4 3 3 no no 3.70 3.70 Novice Novice Beginner Beginner 1 1
6696
+ 5 3 2 no yes 3.70 3.76 Novice Beginner Beginner Beginner 1 0
6697
+ 6 2 1 yes yes 3.76 3.95 Beginner Beginner Beginner Beginner 0 0
6698
+ 7 1 2 yes yes 3.95 3.76 Beginner Beginner Beginner Beginner 0 0
6699
+ 8 1 1 yes yes 3.95 3.95 Beginner Beginner Beginner Beginner 0 0
6700
+
6701
+ # Example 11: Perform join with compound 'on' condition having
6702
+ # more than one binary operator.
6703
+ >>> rhs_2 = lhs.assign(double_gpa=lhs.gpa * 2)
6704
+ >>> joined_df_2 = lhs.join(rhs_2, on=rhs_2.double_gpa == lhs.gpa * 2, how="left", lprefix="l", rprefix="r")
6705
+ >>> joined_df_2
6706
+ l_id r_id l_masters r_masters l_gpa r_gpa l_stats r_stats l_programming r_programming l_admitted r_admitted double_gpa
6707
+ 0 3 3 no no 3.70 3.70 Novice Novice Beginner Beginner 1 1 7.40
6708
+ 1 2 2 yes yes 3.76 3.76 Beginner Beginner Beginner Beginner 0 0 7.52
6709
+ 2 1 1 yes yes 3.95 3.95 Beginner Beginner Beginner Beginner 0 0 7.90
6710
+
6711
+ # Example 12: Perform join on DataFrames with 'on' condition
6712
+ # having FunctionExpression.
6713
+ >>> df = DataFrame("admissions_train")
6714
+ >>> df2 = df.alias("rhs_df")
6715
+ >>> joined_df_3 = df.join(df2, on=(df.gpa.round(1) - df2.gpa.round(1)).mod(2.5) > 2,
6716
+ >>> how="inner", lprefix="l")
6717
+ >>> joined_df_3.sort(["id", "l_id"])
6718
+ l_id id l_masters masters l_gpa gpa l_stats stats l_programming programming l_admitted admitted
6719
+ 0 1 24 yes no 3.95 1.87 Beginner Advanced Beginner Novice 0 1
6720
+ 1 13 24 no no 4.0 1.87 Advanced Advanced Novice Novice 1 1
6721
+ 2 15 24 yes no 4.0 1.87 Advanced Advanced Advanced Novice 1 1
6722
+ 3 25 24 no no 3.96 1.87 Advanced Advanced Advanced Novice 1 1
6723
+ 4 27 24 yes no 3.96 1.87 Advanced Advanced Advanced Novice 0 1
6724
+ 5 29 24 yes no 4.0 1.87 Novice Advanced Beginner Novice 0 1
6725
+ 6 40 24 yes no 3.95 1.87 Novice Advanced Beginner Novice 0 1
6726
+
6356
6727
  """
6728
+
6357
6729
  # Argument validations
6358
6730
  awu_matrix = []
6359
6731
  awu_matrix.append(["other", other, False, (DataFrame)])
@@ -6367,17 +6739,11 @@ class DataFrame():
6367
6739
  # Validate argument types
6368
6740
  _Validators._validate_function_arguments(awu_matrix)
6369
6741
 
6370
- # If user has not provided suffix argument(s), then prefix argument(s) value(s) are passed by
6371
- # user hence we will set the affix variables (laffix and raffix) with provided value(s).
6372
- # affix_type is also set appropriately.
6373
- if lsuffix is not None or rsuffix is not None:
6374
- laffix = lsuffix
6375
- raffix = rsuffix
6376
- affix_type = "suffix"
6377
- else:
6378
- laffix = lprefix
6379
- raffix = rprefix
6380
- affix_type = "prefix"
6742
+ # If self and other DataFrames are pointing to same Table object,
6743
+ # raise error.
6744
+ if self._metaexpr.t is other._metaexpr.t:
6745
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_ALIAS_REQUIRED, "join"),
6746
+ MessageCodes.TDMLDF_ALIAS_REQUIRED)
6381
6747
 
6382
6748
  how_lc = how.lower()
6383
6749
 
@@ -6395,12 +6761,33 @@ class DataFrame():
6395
6761
  for col in other.columns:
6396
6762
  other_columns_lower_actual_map[col.lower()] = col
6397
6763
 
6398
- for column in self_columns_lower_actual_map.keys():
6399
- if column in other_columns_lower_actual_map.keys():
6400
- if laffix is None and raffix is None:
6401
- raise TeradataMlException(
6402
- Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
6403
- MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
6764
+ # Set the affix variables (laffix and raffix) with provided value(s)
6765
+ # of lsuffix, rsuffix, lprefix and rprefix.
6766
+ # Also set affix_type appropriately.
6767
+ laffix = None
6768
+ raffix = None
6769
+ affix_type = None
6770
+ if lsuffix is not None or rsuffix is not None:
6771
+ laffix = lsuffix
6772
+ raffix = rsuffix
6773
+ affix_type = "suffix"
6774
+ elif lprefix is not None or rprefix is not None:
6775
+ laffix = lprefix
6776
+ raffix = rprefix
6777
+ affix_type = "prefix"
6778
+
6779
+ # Same column names can be present in two dataframes involved
6780
+ # in join operation in below two cases:
6781
+ # Case 1: Self join.
6782
+ # Case 2: Two tables having common column names.
6783
+ # In any case, at least one kind of affix is required to generate
6784
+ # distinct column names in resultant table. Throw error if no affix
6785
+ # is available.
6786
+ if not set(self_columns_lower_actual_map.keys()).isdisjoint(other_columns_lower_actual_map.keys()):
6787
+ if affix_type is None:
6788
+ raise TeradataMlException(
6789
+ Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
6790
+ MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
6404
6791
 
6405
6792
  # Both affixes should not be equal to perform join.
6406
6793
  if laffix == raffix and laffix is not None:
@@ -6409,115 +6796,158 @@ class DataFrame():
6409
6796
  "'l{affix_type}' and 'r{affix_type}'".format(affix_type=affix_type)),
6410
6797
  MessageCodes.TDMLDF_INVALID_TABLE_ALIAS)
6411
6798
 
6412
- if how_lc != "cross":
6413
- if isinstance(on, str) or isinstance(on, ColumnExpression):
6414
- on = [on]
6415
-
6416
- all_join_conditions = []
6417
- invalid_join_conditions = []
6418
- # Forming join condition
6419
- for condition in on:
6420
- ori_condition = condition
6421
-
6422
- if not isinstance(condition, (ColumnExpression, str)):
6423
- invalid_join_conditions.append(condition)
6424
- continue
6425
-
6426
- # Process only when the on condition is string or a ColumnExpression
6427
- if isinstance(condition, ColumnExpression):
6428
- columns = condition.original_column_expr
6429
- condition = condition.compile()
6430
-
6431
- for op in TeradataConstants.TERADATA_JOIN_OPERATORS.value:
6432
- if op in condition:
6433
- conditional_separator = op
6434
- break
6435
- else:
6436
- # If no join condition is mentioned, default is taken as equal.
6437
- # If on is ['a'], then it is equal to 'df1.a = df2.a'
6438
- columns = [condition, condition]
6439
- condition = "{0} = {0}".format(condition)
6440
- conditional_separator = "="
6441
-
6442
- if isinstance(ori_condition, str):
6443
- columns = [column.strip() for column in condition.split(sep=conditional_separator)
6444
- if len(column) > 0]
6445
-
6446
- if len(columns) != 2:
6447
- invalid_join_conditions.append(condition)
6448
- else:
6449
- left_col = self.__add_alias_to_column(columns[0], self, laffix if laffix is not None else "df1")
6450
- right_col = self.__add_alias_to_column(columns[1], other, raffix if raffix is not None else "df2")
6451
- if conditional_separator == "!=":
6452
- # "!=" is python way of expressing 'not equal to'. "<>" is Teradata way of
6453
- # expressing 'not equal to'. Adding support for "!=".
6454
- conditional_separator = "<>"
6455
- all_join_conditions.append('{0} {1} {2}'.format(left_col, conditional_separator, right_col))
6456
-
6457
- if len(invalid_join_conditions) > 0:
6458
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_JOIN_CONDITION,
6459
- ", ".join(invalid_join_conditions)),
6460
- MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
6461
-
6462
- join_condition = " and ".join(all_join_conditions)
6463
- else:
6464
- join_condition = ""
6465
-
6466
- df1_columns_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr)
6467
- df2_columns_types = df_utils._get_required_columns_types_from_metaexpr(other._metaexpr)
6468
-
6469
- select_columns = []
6470
- new_metaexpr_columns_types = OrderedDict()
6471
-
6472
- for column in self.columns:
6473
- if df_utils._check_column_exists(column.lower(), other_columns_lower_actual_map.keys()):
6474
- # Check if column found in other DataFrame has same case or different.
6475
- # Return the column name from the other DataFrame.
6476
- other_column = other_columns_lower_actual_map[column.lower()]
6477
-
6478
- df1_column_with_affix = self.__check_and_return_new_column_name(laffix, other_column,
6479
- other_columns_lower_actual_map.keys(),
6480
- "right", affix_type)
6481
- select_columns.append("{0} as {1}".format(
6482
- self.__get_fully_qualified_col_name(other_column, "df1" if laffix is None else laffix),
6483
- df1_column_with_affix))
6484
-
6485
- df2_column_with_affix = self.__check_and_return_new_column_name(raffix, column,
6486
- self_columns_lower_actual_map.keys(),
6487
- "left", affix_type)
6488
- select_columns.append("{0} as {1}".format(
6489
- self.__get_fully_qualified_col_name(column, "df2" if raffix is None else raffix),
6490
- df2_column_with_affix))
6491
-
6492
- # As we are creating new column name, adding it to new metadata dict for new dataframe from join.
6493
- self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
6494
- UtilFuncs._teradata_unquote_arg(df1_column_with_affix, "\""),
6495
- column, df1_columns_types)
6496
-
6497
- self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
6498
- UtilFuncs._teradata_unquote_arg(df2_column_with_affix, "\""),
6499
- other_column, df2_columns_types)
6500
-
6799
+ try:
6800
+ # Set an attribute named '_join_alias' to underlying SQLAlchemy table objects
6801
+ # and use it as default alias for compiling.
6802
+ setattr(self._metaexpr.t, "_join_alias", "lhs")
6803
+ setattr(other._metaexpr.t, "_join_alias", "rhs")
6804
+ lhs_alias = "lhs"
6805
+ rhs_alias = "rhs"
6806
+
6807
+ # Step 1: Generate the on clause string.
6808
+ if how_lc != "cross":
6809
+ on = UtilFuncs._as_list(on)
6810
+
6811
+ all_join_conditions = []
6812
+ invalid_join_conditions = []
6813
+ # Forming join condition
6814
+ for condition in on:
6815
+ # Process only when the on condition is either a string or a ColumnExpression.
6816
+ if not isinstance(condition, (ColumnExpression, str)):
6817
+ invalid_join_conditions.append(condition)
6818
+ continue
6819
+
6820
+ # Generate final on clause string from string representation of condition.
6821
+ if isinstance(condition, str):
6822
+ # Process the string manually.
6823
+ # 1. Parse the string to get operator.
6824
+ for op in TeradataConstants.TERADATA_JOIN_OPERATORS.value:
6825
+ if op in condition:
6826
+ conditional_separator = op
6827
+ break
6828
+ else:
6829
+ # If no join condition is mentioned, then string represents the column.
6830
+ # In this case, default operator is taken as equal.
6831
+ # If on is ['a'], then it is equal to 'lhs.a = rhs.a'
6832
+ columns = [condition, condition]
6833
+ condition = "{0} = {0}".format(condition)
6834
+ conditional_separator = "="
6835
+ # 2. Split the string using operator and extract LHS and RHS
6836
+ # columns from a binary expression.
6837
+ columns = [column.strip() for column in condition.split(sep=conditional_separator)
6838
+ if len(column) > 0]
6839
+
6840
+ if len(columns) != 2:
6841
+ invalid_join_conditions.append(condition)
6842
+ # TODO: Raise exception here only.
6843
+ else:
6844
+ # 3. Generate fully qualified names using affix and table alias
6845
+ # and create final on clause condition string.
6846
+ left_col = self.__add_alias_to_column(columns[0], self, lhs_alias)
6847
+ right_col = self.__add_alias_to_column(columns[1], other, rhs_alias)
6848
+ if conditional_separator == "!=":
6849
+ # "!=" is python way of expressing 'not equal to'. "<>" is Teradata way of
6850
+ # expressing 'not equal to'. Adding support for "!=".
6851
+ conditional_separator = "<>"
6852
+ all_join_conditions.append(
6853
+ '{0} {1} {2}'.format(left_col, conditional_separator, right_col))
6854
+
6855
+ # Generate on clause string from column expression.
6856
+ if isinstance(condition, ColumnExpression):
6857
+ compiled_condition = condition.compile(compile_kwargs={'include_table': True,
6858
+ 'literal_binds': True,
6859
+ 'table_name_kind': '_join_alias',
6860
+ 'compile_with_caller_table': True})
6861
+
6862
+ all_join_conditions.append(compiled_condition)
6863
+
6864
+ # Raise error if invalid on conditions are passed.
6865
+ if len(invalid_join_conditions) > 0:
6866
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_JOIN_CONDITION,
6867
+ ", ".join(invalid_join_conditions)),
6868
+ MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
6869
+
6870
+ # Generate final on condition.
6871
+ join_condition = " and ".join(all_join_conditions)
6501
6872
  else:
6502
- # As column not present in right DataFrame, directly adding column to new metadata dict.
6503
- self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, df1_columns_types)
6504
- select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
6873
+ # In case of cross join no need of condition.
6874
+ join_condition = ""
6505
6875
 
6506
- for column in other.columns:
6507
- if not df_utils._check_column_exists(column.lower(), self_columns_lower_actual_map.keys()):
6508
- # As column not present in left DataFrame, directly adding column to new metadata dict.
6509
- self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, df2_columns_types)
6510
- select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
6876
+ # Step 2: Generate the select clause string.
6877
+ # Generate new column names for overlapping column names using lsuffix, rsuffix, lprefix, rprefix.
6878
+ # Also, use table alias while addressing overlapping column names.
6879
+ lhs_columns_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr)
6880
+ rhs_columns_types = df_utils._get_required_columns_types_from_metaexpr(other._metaexpr)
6511
6881
 
6512
- # Create a node in AED using _aed_join
6513
- join_node_id = self._aed_utils._aed_join(self._nodeid, other._nodeid, ", ".join(select_columns), how_lc,
6514
- join_condition, "df1" if laffix is None else laffix,
6515
- "df2" if raffix is None else raffix)
6882
+ select_columns = []
6883
+ new_metaexpr_columns_types = OrderedDict()
6516
6884
 
6517
- # Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid and underlying table name.
6518
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(join_node_id, new_metaexpr_columns_types.items())
6885
+ # Processing columns in LHS DF/ self DF.
6886
+ for column in self.columns:
6887
+ if df_utils._check_column_exists(column.lower(), other_columns_lower_actual_map.keys()):
6888
+ # Check if column found in other DataFrame has same case or different.
6889
+ # Return the column name from the other DataFrame.
6890
+ other_column = other_columns_lower_actual_map[column.lower()]
6891
+
6892
+ # Check if column name in LHS dataframe is same as that of in RHS dataframe.
6893
+ # If so, generate new name for LHS DF column using provided affix.
6894
+ df1_column_with_affix = self.__check_and_return_new_column_name(laffix, other_column,
6895
+ other_columns_lower_actual_map.keys(),
6896
+ "right", affix_type)
6897
+
6898
+ # Generate select clause string for current column and append to list.
6899
+ select_columns.append("{0} as {1}".format(
6900
+ self.__get_fully_qualified_col_name(other_column, lhs_alias),
6901
+ df1_column_with_affix))
6902
+
6903
+ # Check if column name in RHS dataframe is same as that of in LHS dataframe.
6904
+ # If so, generate new name for RHS DF column using provided affix.
6905
+ df2_column_with_affix = self.__check_and_return_new_column_name(raffix, column,
6906
+ self_columns_lower_actual_map.keys(),
6907
+ "left", affix_type)
6908
+ # Generate select clause string for current column and append to list.
6909
+ select_columns.append("{0} as {1}".format(
6910
+ self.__get_fully_qualified_col_name(column, rhs_alias),
6911
+ df2_column_with_affix))
6912
+
6913
+ # As we are creating new column name, adding it to new metadata dict for new dataframe from join.
6914
+ self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
6915
+ UtilFuncs._teradata_unquote_arg(df1_column_with_affix, "\""),
6916
+ column, lhs_columns_types)
6917
+
6918
+ self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
6919
+ UtilFuncs._teradata_unquote_arg(df2_column_with_affix, "\""),
6920
+ other_column, rhs_columns_types)
6519
6921
 
6520
- return self._create_dataframe_from_node(join_node_id, new_metaexpr, self._index_label)
6922
+ else:
6923
+ # As column with same name is not present in RHS DataFrame now,
6924
+ # directly adding column to new metadata dict.
6925
+ self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, lhs_columns_types)
6926
+ select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
6927
+
6928
+ # Processing columns in RHS DF/ other DF.
6929
+ # Here we will only be processing columns which are not overlapping.
6930
+ for column in other.columns:
6931
+ if not df_utils._check_column_exists(column.lower(), self_columns_lower_actual_map.keys()):
6932
+ # As column not present in left DataFrame, directly adding column to new metadata dict.
6933
+ self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, rhs_columns_types)
6934
+ select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
6935
+
6936
+ # Step 3: Create a node in AED using _aed_join using appropriate alias for involved tables.
6937
+ join_node_id = self._aed_utils._aed_join(self._nodeid, other._nodeid, ", ".join(select_columns),
6938
+ how_lc, join_condition, lhs_alias, rhs_alias)
6939
+
6940
+ # Step 4: Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid
6941
+ # and underlying table name.
6942
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(join_node_id, new_metaexpr_columns_types.items())
6943
+
6944
+ # Return a new joined dataframe.
6945
+ return self._create_dataframe_from_node(join_node_id, new_metaexpr, self._index_label)
6946
+ finally:
6947
+ # Delete the '_join_alias' attribute attached to underlying
6948
+ # SQLALchemy table objects.
6949
+ delattr(self._metaexpr.t, "_join_alias")
6950
+ delattr(other._metaexpr.t, "_join_alias")
6521
6951
 
6522
6952
  def __add_alias_to_column(self, column, df, alias):
6523
6953
  """
@@ -6577,7 +7007,7 @@ class DataFrame():
6577
7007
  return "{0}.{1}".format(UtilFuncs._teradata_quote_arg(alias, "\"", False),
6578
7008
  UtilFuncs._teradata_quote_arg(column, "\"", False))
6579
7009
 
6580
- def __check_and_return_new_column_name(self, affix, column, col_list, df_side, affix_type):
7010
+ def __check_and_return_new_column_name(self, affix, column, col_list, other_df_side, affix_type):
6581
7011
  """
6582
7012
  Check new column name alias with column exists in col_list or not, if exists throws exception else
6583
7013
  returns new column name.
@@ -6586,7 +7016,7 @@ class DataFrame():
6586
7016
  affix - affix to be added to column.
6587
7017
  column - column name.
6588
7018
  col_list - list of columns to check in which new column is exists or not.
6589
- df_side - Side of the dataframe.
7019
+ other_df_side - Side on which the other dataframe in current join operation resides.
6590
7020
  affix_type - Type of affix. Either "prefix" or "suffix".
6591
7021
 
6592
7022
  EXAMPLES:
@@ -6600,19 +7030,19 @@ class DataFrame():
6600
7030
  return UtilFuncs._teradata_quote_arg(column, "\"", False)
6601
7031
 
6602
7032
  # If Prefix, affix is added before column name else it is appended.
6603
- df1_column_with_affix = "{0}_{1}" if affix_type == "prefix" else "{1}_{0}"
6604
- df1_column_with_affix = df1_column_with_affix.format(affix,
6605
- UtilFuncs._teradata_unquote_arg(column, "\""))
6606
- if df_utils._check_column_exists(df1_column_with_affix.lower(), col_list):
6607
- if df_side == "right":
6608
- suffix_side = "l{}".format(affix_type)
7033
+ column_with_affix = "{0}_{1}" if affix_type == "prefix" else "{1}_{0}"
7034
+ column_with_affix = column_with_affix.format(affix,
7035
+ UtilFuncs._teradata_unquote_arg(column, "\""))
7036
+ if df_utils._check_column_exists(column_with_affix.lower(), col_list):
7037
+ if other_df_side == "right":
7038
+ affix_type = "l{}".format(affix_type)
6609
7039
  else:
6610
- suffix_side = "r{}".format(affix_type)
7040
+ affix_type = "r{}".format(affix_type)
6611
7041
  raise TeradataMlException(
6612
- Messages.get_message(MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS, df1_column_with_affix, df_side,
6613
- suffix_side),
7042
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS, column_with_affix, other_df_side,
7043
+ affix_type),
6614
7044
  MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS)
6615
- return UtilFuncs._teradata_quote_arg(df1_column_with_affix, "\"", False)
7045
+ return UtilFuncs._teradata_quote_arg(column_with_affix, "\"", False)
6616
7046
 
6617
7047
  def __add_column_type_item_to_dict(self, new_metadata_dict, new_column, column, column_types):
6618
7048
  """
@@ -7108,6 +7538,184 @@ class DataFrame():
7108
7538
  if function_name is None or function_name in VANTAGE_FUNCTION_ARGTYPE_DEPENDENT_MAPPER:
7109
7539
  self.__execute_node_and_set_table_name(self._nodeid)
7110
7540
  return True
7541
+
7542
+ def _assign_udf(self, udf_expr):
7543
+ """
7544
+ DESCRIPTION:
7545
+ Internal function for DataFrame.assign() to execute the udf using
7546
+ Script Table Operator and create new column for teradataml DataFrame.
7547
+
7548
+ PARAMETER:
7549
+ udf_expr:
7550
+ Required Argument.
7551
+ Specifies a dictionary of column name to UDF expressions.
7552
+ Types: dict
7553
+
7554
+ RETURNS:
7555
+ teradataml DataFrame
7556
+
7557
+ RAISES:
7558
+ None.
7559
+
7560
+ EXAMPLES:
7561
+ self._assign_udf(udf_expr)
7562
+ """
7563
+
7564
+ df = self
7565
+ env_name = None
7566
+ # Create a dictionary of env_name to list of output columns to be run on that env.
7567
+ env_mapper = OrderedDict()
7568
+
7569
+ exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
7570
+ if exec_mode == 'REMOTE':
7571
+ _Validators._check_auth_token("udf")
7572
+ for colname, col in udf_expr.items():
7573
+ env_name = UtilFuncs._get_env_name(col)
7574
+ # Store the env_name and its corresponding output column
7575
+ if env_name in env_mapper:
7576
+ env_mapper[env_name].append(colname)
7577
+ else:
7578
+ env_mapper[env_name] = [colname]
7579
+ else:
7580
+ env_mapper[env_name] = udf_expr.keys()
7581
+
7582
+ for env_name, cols in env_mapper.items():
7583
+ # Create a dictionary of output columns to column type.
7584
+ returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
7585
+ # Store the udf functions
7586
+ user_function = []
7587
+ # Create a dictionary of output column name to udf name
7588
+ columns_definitions = {}
7589
+ # Create a dictionary of output column name to udf arguments
7590
+ function_args = {}
7591
+ for colname, col in udf_expr.items():
7592
+ delimiter = col._delimiter
7593
+ quotechar = col._quotechar
7594
+ if colname in cols:
7595
+ user_function.append(col._udf)
7596
+ function_args[colname] = col._udf_args if col._udf_args else ()
7597
+ returns[colname] = col.type
7598
+ columns_definitions[colname] = col._udf.__name__
7599
+
7600
+ tbl_operators = _TableOperatorUtils([],
7601
+ df,
7602
+ "udf",
7603
+ user_function,
7604
+ exec_mode,
7605
+ chunk_size=None,
7606
+ returns=returns,
7607
+ delimiter=delimiter,
7608
+ quotechar=quotechar,
7609
+ num_rows=1,
7610
+ auth=None,
7611
+ data_partition_column=None,
7612
+ data_hash_column=None,
7613
+ data_order_column=None,
7614
+ is_local_order=None,
7615
+ nulls_first=None,
7616
+ sort_ascending=None,
7617
+ charset=None,
7618
+ env_name = env_name,
7619
+ style = "csv",
7620
+ function_args=function_args,
7621
+ columns_definitions=columns_definitions,
7622
+ output_type_converters={
7623
+ col_name: _Dtypes._teradata_type_to_python_type(col_type)
7624
+ for col_name, col_type in returns.items()})
7625
+
7626
+ df = tbl_operators.execute()
7627
+ return df
7628
+
7629
+ def _assign_call_udf(self, call_udf_expr):
7630
+ """
7631
+ DESCRIPTION:
7632
+ Internal function for DataFrame.assign() to execute the call_udf using
7633
+ Script/Apply Table Operator and create new column for teradataml DataFrame.
7634
+
7635
+ PARAMETER:
7636
+ call_udf_expr:
7637
+ Required Argument.
7638
+ Specifies a dictionary of column name to call_udf expressions.
7639
+ Types: dict
7640
+
7641
+ RETURNS:
7642
+ teradataml DataFrame
7643
+
7644
+ RAISES:
7645
+ None.
7646
+
7647
+ EXAMPLES:
7648
+ # call_udf_expr is a dictionary of column names to call_udf expressions.
7649
+ call_udf_expr = {'upper_col': <teradataml.dataframe.sql._SQLColumnExpression object at 0x0000028E59C44310>,
7650
+ 'sum_col': <teradataml.dataframe.sql._SQLColumnExpression object at 0x0000028E59C41690>}
7651
+ self._assign_register(call_udf_expr)
7652
+ """
7653
+ df = self
7654
+ # Create a dictionary of output columns to column type (teradata type).
7655
+ returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
7656
+ # Create a dictionary of output columns to column type (python types).
7657
+ output_type_converters = {col_name: _Dtypes._teradata_type_to_python_type(col_type) \
7658
+ for col_name, col_type in returns.items()}
7659
+
7660
+ for colname, col in call_udf_expr.items():
7661
+ returns[colname] = col.type
7662
+ output_type_converters[colname] = _Dtypes._teradata_type_to_python_type(col.type)
7663
+ script_name = col._udf_script
7664
+ delimiter = col._delimiter
7665
+ quotechar = col._quotechar
7666
+
7667
+ # Create a dictionary of arguments to be passed to the script.
7668
+ script_data = {}
7669
+ script_data['input_cols'] = df.columns
7670
+ script_data['output_cols'] = list(returns.keys())
7671
+ script_data['output_type_converters'] = output_type_converters
7672
+ script_data['function_args'] = {colname: col._udf_args}
7673
+ script_data['delimiter'] = delimiter
7674
+ script_data['qoutechar'] = quotechar
7675
+
7676
+ # Convert the dictionary to a string.
7677
+ # The string is URL encoded to pass it as a parameter to the script.
7678
+ script_data = urllib.parse.quote_plus(json.dumps(script_data))
7679
+
7680
+ if UtilFuncs._is_lake():
7681
+ from teradataml.table_operators.Apply import Apply
7682
+ apply_op_obj = Apply(data=df,
7683
+ script_name=script_name,
7684
+ env_name=col._env_name,
7685
+ returns = returns,
7686
+ delimiter = delimiter,
7687
+ quotechar=quotechar,
7688
+ files_local_path=GarbageCollector._get_temp_dir_name(),
7689
+ apply_command="python3 {} {}".format(script_name, script_data)
7690
+ )
7691
+ try:
7692
+ df = apply_op_obj.execute_script(
7693
+ output_style=OutputStyle.OUTPUT_TABLE.value)
7694
+ except Exception:
7695
+ raise
7696
+ else:
7697
+ import teradataml.context.context as context
7698
+ database = context._get_current_databasename()
7699
+
7700
+ check_reserved_keyword = False if sorted(list(returns.keys())) == sorted(df.columns) else True
7701
+
7702
+ from teradataml.table_operators.Script import Script
7703
+ table_op_obj = Script(data=df,
7704
+ script_name=script_name,
7705
+ files_local_path=GarbageCollector._get_temp_dir_name(),
7706
+ script_command="{}/bin/python3 ./{}/{} {}".format(
7707
+ configure.indb_install_location, database, script_name, script_data),
7708
+ returns=returns,
7709
+ quotechar=quotechar,
7710
+ delimiter = delimiter
7711
+ )
7712
+ table_op_obj.check_reserved_keyword = check_reserved_keyword
7713
+ try:
7714
+ df = table_op_obj.execute_script(
7715
+ output_style=OutputStyle.OUTPUT_TABLE.value)
7716
+ except Exception:
7717
+ raise
7718
+ return df
7111
7719
 
7112
7720
  @collect_queryband(queryband="DF_assign")
7113
7721
  def assign(self, drop_columns=False, **kwargs):
@@ -7119,10 +7727,12 @@ class DataFrame():
7119
7727
  drop_columns:
7120
7728
  Optional Argument.
7121
7729
  If True, drop columns that are not specified in assign.
7122
- Note:
7123
- When DataFrame.assign() is run on DataFrame.groupby(), this argument
7124
- is ignored. In such cases, all columns are dropped and only new columns
7125
- and grouping columns are returned.
7730
+ Notes:
7731
+ 1. When DataFrame.assign() is run on DataFrame.groupby(), this argument
7732
+ is ignored. In such cases, all columns are dropped and only new columns
7733
+ and grouping columns are returned.
7734
+ 2. Argument is ignored for UDF functions.
7735
+
7126
7736
  Default Value: False
7127
7737
  Types: bool
7128
7738
 
@@ -7138,6 +7748,7 @@ class DataFrame():
7138
7748
  * SQLAlchemy ClauseElements.
7139
7749
  (See teradataml extension with SQLAlchemy in teradataml User Guide
7140
7750
  and Function reference guide for more details)
7751
+ * Function - udf, call_udf.
7141
7752
 
7142
7753
 
7143
7754
  RETURNS:
@@ -7163,6 +7774,16 @@ class DataFrame():
7163
7774
  used, but the column used in such function must be a part of group by columns.
7164
7775
  See examples for teradataml extension with SQLAlchemy on using various
7165
7776
  functions with DataFrame.assign().
7777
+ 6. UDF expressions can run on both Vantage Cloud Lake leveraging Apply Table Operator
7778
+ of Open Analytics Framework and Enterprise leveraging Vantage's Script Table Operator.
7779
+ 7. One can pass both regular expressions and udf expressions to this API.
7780
+ However, regular expressions are computed first followed by udf expressions.
7781
+ Hence the order of columns also maintained in same order.
7782
+ Look at Example 18 to understand more.
7783
+ 8. While passing multiple udf expressions, one can not pass one column output
7784
+ as another column input in the same ``assign`` call.
7785
+ 9. If user pass multiple udf expressions, delimiter and quotechar specified in
7786
+ last udf expression are considered for processing.
7166
7787
 
7167
7788
  RAISES:
7168
7789
  1. ValueError - When a callable is passed as a value, or columns from different
@@ -7424,6 +8045,158 @@ class DataFrame():
7424
8045
  1 Advanced 2.886226 3.508750 84.21
7425
8046
  2 Novice 6.377775 3.559091 39.15
7426
8047
  >>>
8048
+
8049
+ #
8050
+ # Executing user defined function (UDF) with assign()
8051
+ #
8052
+ # Example 15: Create two user defined functions to 'to_upper' and 'sum',
8053
+ # 'to_upper' to get the values in 'accounts' to upper case and
8054
+ # 'sum' to add length of string values in column 'accounts'
8055
+ # with column 'Feb' and store the result in Integer type column.
8056
+ >>> @udf
8057
+ ... def to_upper(s):
8058
+ ... if s is not None:
8059
+ ... return s.upper()
8060
+ >>>
8061
+ >>> from teradatasqlalchemy.types import INTEGER
8062
+ >>> @udf(returns=INTEGER())
8063
+ ... def sum(x, y):
8064
+ ... return len(x)+y
8065
+ >>>
8066
+ # Assign both Column Expressions returned by user defined functions
8067
+ # to the DataFrame.
8068
+ >>> res = df.assign(upper_stats = to_upper('accounts'), len_sum = sum('accounts', 'Feb'))
8069
+ >>> res
8070
+ Feb Jan Mar Apr datetime upper_stats len_sum
8071
+ accounts
8072
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC 98
8073
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC 207
8074
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC 100
8075
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC 209
8076
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC 220
8077
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO 218
8078
+ >>>
8079
+
8080
+ # Example 16: Create a user defined function to add 4 to the 'datetime' column
8081
+ # and store the result in DATE type column.
8082
+ >>> from teradatasqlalchemy.types import DATE
8083
+ >>> import datetime
8084
+ >>> @udf(returns=DATE())
8085
+ ... def add_date(x, y):
8086
+ ... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y)).strftime("%y/%m/%d")
8087
+ >>>
8088
+ # Assign the Column Expression returned by user defined function
8089
+ # to the DataFrame.
8090
+ >>> res = df.assign(new_date = add_date('datetime', 4))
8091
+ >>> res
8092
+ Feb Jan Mar Apr datetime new_date
8093
+ accounts
8094
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
8095
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
8096
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
8097
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
8098
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
8099
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
8100
+ >>>
8101
+
8102
+ # Example 17: Create a user defined functions to 'to_upper' to get
8103
+ # the values in 'accounts' to upper case and create a
8104
+ # new column with a string literal value.
8105
+ >>> @udf
8106
+ ... def to_upper(s):
8107
+ ... if s is not None:
8108
+ ... return s.upper()
8109
+ >>>
8110
+ # Assign both expressions to the DataFrame.
8111
+ >>> res = df.assign(upper_stats = to_upper('accounts'), new_col = 'string')
8112
+ >>> res
8113
+ Feb Jan Mar Apr datetime new_col upper_stats
8114
+ accounts
8115
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 string ALPHA CO
8116
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 string BLUE INC
8117
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 string YELLOW INC
8118
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 string JONES LLC
8119
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 string RED INC
8120
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 string ORANGE INC
8121
+ >>>
8122
+
8123
+ # Example 18: Create two user defined functions to 'to_upper' and 'sum'
8124
+ # and create new columns with string literal value and
8125
+ # arithmetic operation on column 'Feb'.
8126
+ >>> @udf
8127
+ ... def to_upper(s):
8128
+ ... if s is not None:
8129
+ ... return s.upper()
8130
+ >>>
8131
+ >>> from teradatasqlalchemy.types import INTEGER
8132
+ >>> @udf(returns=INTEGER())
8133
+ ... def sum(x, y):
8134
+ ... return len(x)+y
8135
+ >>>
8136
+ # Assign all expressions to the DataFrame.
8137
+ >>> res = df.assign(upper_stats = to_upper('accounts'),new_col = 'abc',
8138
+ ... len_sum = sum('accounts', 'Feb'), col_sum = df.Feb+1)
8139
+ >>> res
8140
+ Feb Jan Mar Apr datetime col_sum new_col upper_stats len_sum
8141
+ accounts
8142
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 91.0 abc BLUE INC 98
8143
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 211.0 abc ALPHA CO 218
8144
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 201.0 abc JONES LLC 209
8145
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 91.0 abc YELLOW INC 100
8146
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 211.0 abc ORANGE INC 220
8147
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 201.0 abc RED INC 207
8148
+ >>>
8149
+
8150
+ # Example 19: Convert the values is 'accounts' column to upper case using a user
8151
+ # defined function on Vantage Cloud Lake.
8152
+ # Create a Python 3.10.5 environment with given name and description in Vantage.
8153
+ >>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
8154
+ User environment 'test_udf' created.
8155
+ >>>
8156
+ # Create a user defined functions to 'to_upper' to get the values in upper case
8157
+ # and pass the user env to run it on.
8158
+ >>> from teradataml.dataframe.functions import udf
8159
+ >>> @udf(env_name = env)
8160
+ ... def to_upper(s):
8161
+ ... if s is not None:
8162
+ ... return s.upper()
8163
+ >>>
8164
+ # Assign the Column Expression returned by user defined function
8165
+ # to the DataFrame.
8166
+ >>> df.assign(upper_stats = to_upper('accounts'))
8167
+ Feb Jan Mar Apr datetime upper_stats
8168
+ accounts
8169
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
8170
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
8171
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
8172
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
8173
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
8174
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
8175
+ >>>
8176
+
8177
+ # Example 20: Register and Call the user defined function to get the values upper case.
8178
+ >>> from teradataml.dataframe.functions import udf, register, call_udf
8179
+ >>> @udf
8180
+ ... def to_upper(s):
8181
+ ... if s is not None:
8182
+ ... return s.upper()
8183
+ >>>
8184
+ # Register the created user defined function with name "upper".
8185
+ >>> register("upper", to_upper)
8186
+ >>>
8187
+ # Call the user defined function registered with name "upper" and assign the
8188
+ # ColumnExpression returned to the DataFrame.
8189
+ >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
8190
+ >>> res
8191
+ Feb Jan Mar Apr datetime upper_col
8192
+ accounts
8193
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
8194
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
8195
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
8196
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
8197
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
8198
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
8199
+ >>>
7427
8200
  """
7428
8201
  # Argument validations
7429
8202
  awu_matrix = []
@@ -7469,13 +8242,42 @@ class DataFrame():
7469
8242
  msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
7470
8243
  raise TeradataMlException(msg, MessageCodes.TDMLDF_INFO_ERROR)
7471
8244
 
7472
- try:
7473
- (new_meta, new_nodeid) = self._generate_assign_metaexpr_aed_nodeid(drop_columns, **kwargs)
7474
- return self._create_dataframe_from_node(new_nodeid, new_meta, self._index_label)
7475
- except Exception as err:
7476
- errcode = MessageCodes.TDMLDF_INFO_ERROR
7477
- msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
7478
- raise TeradataMlException(msg, errcode) from err
8245
+ # Create a dictionary of column name to udf expressions and
8246
+ # column name to normal/regular expressions.
8247
+ udf_expr = {}
8248
+ regular_expr = {}
8249
+ call_udf_expr = {}
8250
+ for colname, col in kwargs.items():
8251
+ # If value passed in kwargs is a ColumnExpression and is a udf, store it.
8252
+ if isinstance(col, ColumnExpression) and col._udf:
8253
+ udf_expr[colname] = col
8254
+ # If value passed in kwargs is a ColumnExpression and is a registerd udf script, store it.
8255
+ elif isinstance(col, ColumnExpression) and col._udf_script:
8256
+ call_udf_expr[colname] = col
8257
+ else:
8258
+ regular_expr[colname] = col
8259
+ df = self
8260
+
8261
+ # If kwargs contains both regular and udf expressions, first create new columns
8262
+ # from normal/regular expressions then on the output dataframe create new columns
8263
+ # from udf expression.
8264
+ if bool(regular_expr):
8265
+ try:
8266
+ (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(drop_columns, **regular_expr)
8267
+ df = df._create_dataframe_from_node(new_nodeid, new_meta, df._index_label)
8268
+ except Exception as err:
8269
+ errcode = MessageCodes.TDMLDF_INFO_ERROR
8270
+ msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
8271
+ raise TeradataMlException(msg, errcode) from err
8272
+
8273
+ if bool(udf_expr):
8274
+ df = df._assign_udf(udf_expr)
8275
+
8276
+ if bool(call_udf_expr):
8277
+ df = df._assign_call_udf(call_udf_expr)
8278
+
8279
+ return df
8280
+
7479
8281
 
7480
8282
  @collect_queryband(queryband="DF_get")
7481
8283
  def get(self, key):
@@ -9110,6 +9912,12 @@ class DataFrame():
9110
9912
  # Validate argument types
9111
9913
  _Validators._validate_function_arguments(awu_matrix)
9112
9914
 
9915
+ # If self and right DataFrames are pointing to same Table object,
9916
+ # raise error.
9917
+ if self._metaexpr.t is right._metaexpr.t:
9918
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_ALIAS_REQUIRED, "merge"),
9919
+ MessageCodes.TDMLDF_ALIAS_REQUIRED)
9920
+
9113
9921
  if (right_on is not None and left_on is None) or (right_on is None and left_on is not None):
9114
9922
  raise TeradataMlException(
9115
9923
  Messages.get_message(MessageCodes.MUST_PASS_ARGUMENT, "left_on", "right_on"),
@@ -10107,7 +10915,8 @@ class DataFrame():
10107
10915
  test_size=list_of_fracs[1],
10108
10916
  stratify_column=stratify_column,
10109
10917
  seed=seed,
10110
- persist=True)
10918
+ persist=True,
10919
+ display_table_name=False)
10111
10920
 
10112
10921
  # Retrieve the table name from TrainTestSplit_out object.
10113
10922
  table_name = TrainTestSplit_out.result._table_name
@@ -10218,10 +11027,10 @@ class DataFrame():
10218
11027
 
10219
11028
  # Make this non-lazy. Added this in order to fix https://teradata-pe.atlassian.net/browse/ELE-6368
10220
11029
  # Cannot use __execute_node_and_set_table_name because self points to original df.
10221
- # Hence, setting the __table_name with _execute_node_return_db_object_name.
11030
+ # Hence, setting the _table_name with _execute_node_return_db_object_name.
10222
11031
 
10223
11032
  df = self._create_dataframe_from_node(sample_node_id, new_metaexpr, self._index_label)
10224
- df.__table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
11033
+ df._table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
10225
11034
 
10226
11035
  return df
10227
11036
 
@@ -10352,26 +11161,14 @@ class DataFrame():
10352
11161
  where admitted > 0) as temp_table SAMPLE 0.9'
10353
11162
 
10354
11163
  """
11164
+ # Argument validations
11165
+ awu_matrix = []
11166
+ awu_matrix.append(["full_query", full_query, False, (bool)])
11167
+ # Validate argument types
11168
+ _Validators._validate_function_arguments(awu_matrix)
10355
11169
 
10356
11170
  try:
10357
- # Argument validations
10358
- awu_matrix = []
10359
- awu_matrix.append(["full_query", full_query, False, (bool)])
10360
- # Validate argument types
10361
- _Validators._validate_function_arguments(awu_matrix)
10362
-
10363
- node_id = self._nodeid
10364
-
10365
- if isinstance(self, (DataFrameGroupBy, DataFrameGroupByTime)):
10366
- # If dataframe is either of type groupby or groupbytime
10367
- # then get it's parent dataframe nodeid and return queries
10368
- # for the same
10369
- node_id = self._aed_utils._aed_get_parent_nodeids(self._nodeid)[0]
10370
-
10371
- queries = self._aed_utils._aed_show_query(node_id, query_with_reference_to_top=full_query)
10372
-
10373
- return queries[0][0]
10374
-
11171
+ return self.__generate_aed_query(full_query)
10375
11172
  except TeradataMlException:
10376
11173
  raise
10377
11174
 
@@ -10381,7 +11178,7 @@ class DataFrame():
10381
11178
  except Exception as err:
10382
11179
  errcode = MessageCodes.TDMLDF_INFO_ERROR
10383
11180
  msg = Messages.get_message(errcode)
10384
- raise TeradataMlException(msg, errcode) from err
11181
+ raise TeradataMlException(msg, errcode) from err
10385
11182
 
10386
11183
  @collect_queryband(queryband="DF_mapRow")
10387
11184
  def map_row(self,
@@ -11899,6 +12696,9 @@ class DataFrame():
11899
12696
  _Validators._validate_column_exists_in_dataframe(column_names, self._metaexpr,
11900
12697
  False)
11901
12698
  column_names = list(dict.fromkeys(column_names))
12699
+
12700
+ if list_td_reserved_keywords(column_names):
12701
+ column_names = UtilFuncs._teradata_quote_arg(column_names, "\"", False)
11902
12702
 
11903
12703
  col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, column_names)
11904
12704
  sel_nodeid = self._aed_utils._aed_select(self._nodeid, ','.join(column_names), True)
@@ -13840,7 +14640,7 @@ class DataFrame():
13840
14640
  Types: int OR NoneType
13841
14641
 
13842
14642
  RETURNS:
13843
- iterator, an object to iterate over namedtuples for each row in the DataFrame.
14643
+ iterator, an object to iterate over row in the DataFrame.
13844
14644
 
13845
14645
  RAISES:
13846
14646
  None
@@ -13889,9 +14689,10 @@ class DataFrame():
13889
14689
  cur = execute_sql(query)
13890
14690
 
13891
14691
  if name:
14692
+ columns = [column[0] for column in cur.description]
13892
14693
  for rec in cur:
13893
- Row = namedtuple(name, [column[0] for column in cur.description])
13894
- yield Row(*rec)
14694
+ row = _Row(columns=columns, values=rec)
14695
+ yield row
13895
14696
  else:
13896
14697
  for rec in cur:
13897
14698
  yield rec