teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (131) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +182 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +8 -13
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +60 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +12 -8
  17. teradataml/clients/keycloak_client.py +165 -0
  18. teradataml/common/constants.py +71 -26
  19. teradataml/common/exceptions.py +32 -0
  20. teradataml/common/messagecodes.py +28 -0
  21. teradataml/common/messages.py +13 -4
  22. teradataml/common/sqlbundle.py +3 -2
  23. teradataml/common/utils.py +345 -45
  24. teradataml/context/context.py +259 -93
  25. teradataml/data/apriori_example.json +22 -0
  26. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  27. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  28. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  29. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  30. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  31. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  32. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  34. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  35. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  37. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  38. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  39. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  40. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  41. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  42. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  43. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  46. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  49. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  50. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  51. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  53. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  54. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  55. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  56. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  57. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  58. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  59. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  60. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  61. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  62. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  63. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  65. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  66. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  67. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  68. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  69. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  70. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  71. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  72. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  73. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  74. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  75. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  76. teradataml/data/ner_dict.csv +8 -0
  77. teradataml/data/ner_input_eng.csv +7 -0
  78. teradataml/data/ner_rule.csv +5 -0
  79. teradataml/data/pattern_matching_data.csv +11 -0
  80. teradataml/data/pos_input.csv +40 -0
  81. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  82. teradataml/data/tdnerextractor_example.json +14 -0
  83. teradataml/data/teradataml_example.json +21 -1
  84. teradataml/data/textmorph_example.json +5 -0
  85. teradataml/data/to_num_data.csv +4 -0
  86. teradataml/data/tochar_data.csv +5 -0
  87. teradataml/data/trans_dense.csv +16 -0
  88. teradataml/data/trans_sparse.csv +55 -0
  89. teradataml/data/url_data.csv +10 -9
  90. teradataml/dataframe/copy_to.py +38 -27
  91. teradataml/dataframe/data_transfer.py +61 -45
  92. teradataml/dataframe/dataframe.py +1110 -132
  93. teradataml/dataframe/dataframe_utils.py +73 -27
  94. teradataml/dataframe/functions.py +1070 -9
  95. teradataml/dataframe/sql.py +750 -959
  96. teradataml/dbutils/dbutils.py +33 -13
  97. teradataml/dbutils/filemgr.py +14 -10
  98. teradataml/hyperparameter_tuner/utils.py +4 -2
  99. teradataml/lib/aed_0_1.dll +0 -0
  100. teradataml/opensource/_base.py +12 -157
  101. teradataml/options/configure.py +24 -9
  102. teradataml/scriptmgmt/UserEnv.py +317 -39
  103. teradataml/scriptmgmt/lls_utils.py +456 -135
  104. teradataml/sdk/README.md +79 -0
  105. teradataml/sdk/__init__.py +4 -0
  106. teradataml/sdk/_auth_modes.py +422 -0
  107. teradataml/sdk/_func_params.py +487 -0
  108. teradataml/sdk/_json_parser.py +453 -0
  109. teradataml/sdk/_openapi_spec_constants.py +249 -0
  110. teradataml/sdk/_utils.py +236 -0
  111. teradataml/sdk/api_client.py +897 -0
  112. teradataml/sdk/constants.py +62 -0
  113. teradataml/sdk/modelops/__init__.py +98 -0
  114. teradataml/sdk/modelops/_client.py +406 -0
  115. teradataml/sdk/modelops/_constants.py +304 -0
  116. teradataml/sdk/modelops/models.py +2308 -0
  117. teradataml/sdk/spinner.py +107 -0
  118. teradataml/store/__init__.py +1 -1
  119. teradataml/table_operators/Apply.py +16 -1
  120. teradataml/table_operators/Script.py +20 -1
  121. teradataml/table_operators/query_generator.py +4 -21
  122. teradataml/table_operators/table_operator_util.py +58 -9
  123. teradataml/utils/dtypes.py +4 -2
  124. teradataml/utils/internal_buffer.py +22 -2
  125. teradataml/utils/utils.py +0 -1
  126. teradataml/utils/validators.py +318 -58
  127. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
  128. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
  129. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
  130. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
  131. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
@@ -12,63 +12,72 @@ This file implements the teradataml dataframe.
12
12
  A teradataml dataframe maps virtually to teradata tables and views.
13
13
  """
14
14
  import decimal
15
- import inspect, itertools
15
+ import inspect
16
+ import itertools
16
17
  import json
17
18
  import numbers
18
- import pandas as pd
19
19
  import re
20
- import sqlalchemy
21
20
  import sys
22
21
  import urllib.parse
22
+ from collections import OrderedDict
23
+ from collections.abc import Iterator
23
24
 
25
+ import numpy as np
26
+ import pandas as pd
27
+ import sqlalchemy
24
28
  from sqlalchemy import Column
29
+ from sqlalchemy.exc import NoSuchColumnError
30
+ from sqlalchemy.sql import ClauseElement
31
+ from teradatasql import OperationalError
32
+ from teradatasqlalchemy.dialect import dialect as td_dialect
33
+ from teradatasqlalchemy.dialect import preparer
34
+ from teradatasqlalchemy.types import (BIGINT, BYTEINT, DECIMAL, FLOAT, INTEGER,
35
+ PERIOD_TIMESTAMP, SMALLINT, _TDType)
25
36
 
26
37
  import teradataml.context.context as tdmlctx
27
-
28
- from collections import OrderedDict, namedtuple
29
- from sqlalchemy.sql import ClauseElement
30
- from teradataml import execute_sql
31
- from teradataml import GarbageCollector
32
- from teradataml.dataframe.sql import _MetaExpression
33
- from teradataml.dataframe.sql_interfaces import ColumnExpression
34
- from teradataml.dataframe.sql_functions import case
35
- from teradataml.series.series import Series
36
- from teradatasqlalchemy.types import _TDType, BIGINT, INTEGER, PERIOD_TIMESTAMP, SMALLINT, BYTEINT, FLOAT, DECIMAL
37
- from teradataml.common.deprecations import argument_deprecation
38
- from teradataml.common.utils import UtilFuncs
38
+ from teradataml import GarbageCollector, execute_sql
39
+ from teradataml.common.bulk_exposed_utils import \
40
+ _validate_unimplemented_function
41
+ from teradataml.common.constants import (AEDConstants, OutputStyle,
42
+ PTITableConstants, PythonTypes,
43
+ SourceType, SQLConstants,
44
+ SQLFunctionConstants,
45
+ TableOperatorConstants,
46
+ TeradataConstants, TeradataTypes)
39
47
  from teradataml.common.exceptions import TeradataMlException
40
- from teradataml.common.messages import Messages
41
48
  from teradataml.common.messagecodes import MessageCodes
42
- from teradataml.common.constants import AEDConstants
43
- from teradataml.common.constants import SourceType, PythonTypes, TeradataConstants, \
44
- TeradataTypes, PTITableConstants, TableOperatorConstants, SQLFunctionConstants
45
- from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils, DataFrameUtils
46
- from teradataml.dataframe.indexer import _LocationIndexer
47
- from teradataml.common.aed_utils import AedUtils
48
- from teradataml.options.display import display
49
- from teradataml.options.configure import configure
49
+ from teradataml.common.messages import Messages
50
+ from teradataml.common.sqlbundle import SQLBundle
51
+ from teradataml.common.utils import UtilFuncs
50
52
  from teradataml.dataframe.copy_to import copy_to_sql
53
+ from teradataml.dataframe.data_transfer import _DataTransferUtils
54
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils
55
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
56
+ from teradataml.dataframe.indexer import _LocationIndexer
51
57
  from teradataml.dataframe.row import _Row
52
58
  from teradataml.dataframe.setop import concat
59
+ from teradataml.dataframe.sql import _MetaExpression
60
+ from teradataml.dataframe.sql_functions import case
61
+ from teradataml.dataframe.sql_interfaces import ColumnExpression
62
+ from teradataml.dataframe.window import Window
53
63
  from teradataml.dbutils.dbutils import list_td_reserved_keywords
64
+ from teradataml.options.configure import configure
65
+ from teradataml.options.display import display
54
66
  from teradataml.plot.plot import _Plot
55
67
  from teradataml.scriptmgmt.UserEnv import UserEnv
56
- from teradataml.utils.dtypes import _Dtypes, _ListOf, _TupleOf
57
- from teradataml.utils.validators import _Validators
68
+ from teradataml.series.series import Series
58
69
  from teradataml.table_operators.table_operator_util import _TableOperatorUtils
59
- from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
60
- from teradatasql import OperationalError
61
- from teradataml.dataframe.window import Window
62
- from teradataml.dataframe.data_transfer import _DataTransferUtils
63
- from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
64
70
  from teradataml.telemetry_utils.queryband import collect_queryband
65
- from teradataml.options.configure import configure
66
- from teradataml.utils.internal_buffer import _InternalBuffer
67
- from teradataml.common.constants import OutputStyle
71
+ from teradataml.utils.dtypes import _Dtypes, _ListOf, _TupleOf
72
+ from teradataml.utils.validators import _Validators
73
+
74
+ # Adding imports at the end to avoid circular imports.
75
+ from teradataml.common.aed_utils import AedUtils
68
76
 
69
77
  # TODO use logger when available on master branch
70
78
  # logger = teradatapylog.getLogger()
71
79
 
80
+
72
81
  class in_schema:
73
82
  """
74
83
  Class takes a schema name, a table name and datalake name attributes
@@ -149,26 +158,37 @@ class DataFrame():
149
158
  on tables, views, and queries on Teradata Vantage.
150
159
  """
151
160
 
152
- def __init__(self, table_name=None, index=True, index_label=None, query=None, materialize=False):
161
+ def __init__(self, data=None, index=True, index_label=None, query=None, materialize=False, **kwargs):
153
162
  """
154
163
  Constructor for teradataml DataFrame.
155
164
 
156
165
  PARAMETERS:
157
- table_name:
166
+ data:
158
167
  Optional Argument.
159
- The table name or view name in Teradata Vantage referenced by this DataFrame.
160
- Types: str
168
+ Specifies the input data to create a teradataml DataFrame.
169
+ Notes:
170
+ If a dictionary is provided, it must follow the below requirements:
171
+ * Keys must be strings (column names).
172
+ * Values must be lists of equal length (column data).
173
+ * Nested dictionaries are not supported.
174
+ Types: str OR pandas DataFrame OR in_schema OR numpy array OR list OR dictionary
161
175
 
162
176
  index:
163
177
  Optional Argument.
164
- True if using index column for sorting, otherwise False.
178
+ If "data" is a string, then the argument specifies whether to use the index column
179
+ for sorting or not.
180
+ If "data" is a pandas DataFrame, then this argument specifies whether to
181
+ save Pandas DataFrame index as a column or not.
165
182
  Default Value: True
166
183
  Types: bool
167
184
 
168
185
  index_label:
169
186
  Optional Argument.
170
- Column/s used for sorting.
171
- Types: str OR list of Strings (str)
187
+ If "data" is a string, then the argument specifies column(s) used for sorting.
188
+ If "data" is a pandas DataFrame, then the default behavior is applied.
189
+ Note:
190
+ * Refer to the "index_label" parameter of copy_to_sql() for details on the default behaviour.
191
+ Types: str OR list of str
172
192
 
173
193
  query:
174
194
  Optional Argument.
@@ -187,29 +207,127 @@ class DataFrame():
187
207
  Default Value: False (No materialization)
188
208
  Types: bool
189
209
 
210
+ kwargs:
211
+ table_name:
212
+ Optional Argument.
213
+ The table name or view name in Teradata Vantage referenced by this DataFrame.
214
+ Note:
215
+ * If "data" and "table_name" are both specified, then the "table_name" argument is ignored.
216
+ Types: str or in_schema
217
+
218
+ primary_index:
219
+ Optional Argument.
220
+ Specifies which column(s) to use as primary index for the teradataml DataFrame.
221
+ Note:
222
+ * This argument is only applicable when creating a DataFrame from a pandas DataFrame.
223
+ Types: str OR list of str
224
+
225
+ types:
226
+ Optional Argument.
227
+ Specifies required data types for requested columns to be saved in Teradata Vantage.
228
+ Notes:
229
+ * This argument is not applicable when "data" argument is of type str or in_schema.
230
+ * Refer to the "types" parameter of copy_to_sql() for more details.
231
+ Types: dict
232
+
233
+ columns:
234
+ Optional Argument.
235
+ Specifies the names of the columns to be used in the DataFrame.
236
+ Notes:
237
+ * This argument is not applicable when "data" argument is of type str or in_schema.
238
+ * If "data" is a dictionary and this argument is specified, only the specified columns will be
239
+ included in the DataFrame if the dictionary contains those keys. If the dictionary does not
240
+ contain the specified keys, those columns will be added with NaN values.
241
+ Types: str OR list of str
242
+
190
243
  EXAMPLES:
191
- from teradataml.dataframe.dataframe import DataFrame
244
+ >>> from teradataml.dataframe.dataframe import DataFrame
245
+ >>> import pandas as pd
192
246
 
193
- # Example 1: The following example creates a DataFrame from the 'table_name'
194
- # or 'view_name'.
195
- # Created DataFrame using table name.
196
- df = DataFrame("mytab")
247
+ # Example 1: Create a teradataml DataFrame from table name.
248
+ >>> df = DataFrame("mytab")
197
249
 
198
- # Created DataFrame using view name.
199
- df = DataFrame("myview")
250
+ # Example 2: Create a teradataml DataFrame from view name.
251
+ >>> df = DataFrame("myview")
200
252
 
201
- # Created DataFrame using view name without using index column for sorting.
202
- df = DataFrame("myview", False)
253
+ # Example 3: Create a teradataml DataFrame using view name without using index column for sorting.
254
+ >>> df = DataFrame("myview", False)
203
255
 
204
- # Created DataFrame using table name and sorted using Col1 and Col2
205
- df = DataFrame("mytab", True, "Col1, Col2")
256
+ # Example 4: Create a teradataml DataFrame using table name and consider columns Col1 and Col2
257
+ # while running DataFrame.head() or DataFrame.tail() methods.
258
+ >>> df = DataFrame("mytab", True, ["Col1", "Col2"])
206
259
 
260
+ # Example 5: Create a teradataml DataFrame from the existing Vantage table "dbcinfo"
261
+ # in the non-default database "dbc" using the in_schema() object.
262
+ >>> from teradataml.dataframe.dataframe import in_schema
263
+ >>> df = DataFrame(in_schema("dbc", "dbcinfo"))
207
264
 
208
- # Example 2: The following example creates a DataFrame from the existing Vantage
209
- # table "dbcinfo" in the non-default database "dbc" using the
210
- # in_schema() function.
211
- from teradataml.dataframe.dataframe import in_schema
212
- df = DataFrame(in_schema("dbc", "dbcinfo"))
265
+ # Example 6: Create a teradataml DataFrame from a pandas DataFrame.
266
+ >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
267
+ >>> df = DataFrame(pdf)
268
+ >>> df
269
+ col1 col2 index_label
270
+ 0 3 6 2
271
+ 1 2 5 1
272
+ 2 1 4 0
273
+
274
+ # Example 7: Create a teradataml DataFrame from a pandas DataFrame without index column.
275
+ >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
276
+ >>> df = DataFrame(data=pdf, index=False)
277
+ >>> df
278
+ col1 col2
279
+ 0 3 6
280
+ 1 2 5
281
+ 2 1 4
282
+
283
+ # Example 8: Create a teradataml DataFrame from a pandas DataFrame with
284
+ # index label and primary index as 'id'.
285
+ >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
286
+ >>> df = DataFrame(pdf, index=True, index_label='id', primary_index='id')
287
+ >>> df
288
+ col1 col2
289
+ id
290
+ 2 3 6
291
+ 1 2 5
292
+ 0 1 4
293
+
294
+ # Example 9: Create a teradataml DataFrame from list of lists.
295
+ >>> df = DataFrame([[1, 2], [3, 4]])
296
+ >>> df
297
+ col_0 col_1 index_label
298
+ 0 3 4 1
299
+ 1 1 2 0
300
+
301
+ # Example 10: Create a teradataml DataFrame from numpy array.
302
+ >>> import numpy as np
303
+ >>> df = DataFrame(np.array([[1, 2], [3, 4]]), index=True, index_label="id")
304
+ >>> df
305
+ col_0 col_1
306
+ id
307
+ 1 3 4
308
+ 0 1 2
309
+
310
+ # Example 11: Create a teradataml DataFrame from a dictionary.
311
+ >>> df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=True, index_label="id")
312
+ >>> df
313
+ col1 col2
314
+ id
315
+ 1 2 4
316
+ 0 1 3
317
+
318
+ # Example 12: Create a teradataml DataFrame from list of dictionaries.
319
+ >>> df = DataFrame([{"col1": 1, "col2": 2}, {"col1": 3, "col2": 4}], index=False)
320
+ >>> df
321
+ col1 col2
322
+ 0 3 4
323
+ 1 1 2
324
+
325
+ # Example 13: Create a teradataml DataFrame from list of tuples.
326
+ >>> df = DataFrame([("Alice", 1), ("Bob", 2)])
327
+ >>> df
328
+ col_0 col_1 index_label
329
+ 0 Alice 1 1
330
+ 1 Bob 2 0
213
331
 
214
332
  RAISES:
215
333
  TeradataMlException - TDMLDF_CREATE_FAIL
@@ -243,17 +361,35 @@ class DataFrame():
243
361
  # Property to determine if table is an ART table or not.
244
362
  self._is_art = None
245
363
 
364
+ # This attribute stores the previous assign arguments in continuous assign calls.
365
+ self._previous_assign_args = None
366
+ # This attribute stores the root DataFrame columns.
367
+ self._root_columns = None
368
+
246
369
  self._datalake = None
247
370
  self._database = None
248
371
  self._table = None
249
372
  self._otf = False
250
373
 
251
- if isinstance(table_name, in_schema):
252
- self._table = table_name.table_name
253
- self._datalake = table_name.datalake_name
254
- self._database = table_name.schema_name
374
+ table_name = kwargs.get("table_name", None)
375
+ primary_index = kwargs.get("primary_index", None)
376
+ columns = kwargs.get("columns", None)
377
+ types = kwargs.get("types", None)
378
+
379
+ # Check if the data is an instance of in_schema or if the data is None
380
+ # and table_name is an instance of in_schema, then assign the table_name,
381
+ # datalake_name and schema_name to the DataFrame object.
382
+ schema_obj = data if isinstance(data, in_schema) else (
383
+ table_name if data is None and isinstance(table_name, in_schema) else None)
384
+
385
+ if schema_obj:
386
+ self._table = schema_obj.table_name
387
+ self._datalake = schema_obj.datalake_name
388
+ self._database = schema_obj.schema_name
255
389
  self._otf = True if self._datalake else False
256
390
 
391
+ # Convert schema objects to strings.
392
+ data = str(data) if isinstance(data, in_schema) else data
257
393
  table_name = str(table_name) if isinstance(table_name, in_schema) else table_name
258
394
 
259
395
  # Below matrix is list of list, where in each row contains following elements:
@@ -272,18 +408,49 @@ class DataFrame():
272
408
  # 6. element6 --> A list of permitted values, an argument can accept.
273
409
  # If not specified, it is as good as passing None. If a list is passed, validation will be
274
410
  # performed for permitted values.
411
+
275
412
  awu_matrix = []
276
- awu_matrix.append(["table_name", table_name, True, (str), True])
413
+ dtypes = (list, tuple, dict)
414
+ awu_matrix.append(["data", data, True, (str, pd.DataFrame, np.ndarray, dict, _ListOf(dtypes)), True])
277
415
  awu_matrix.append(["index", index, True, (bool)])
278
416
  awu_matrix.append(["index_label", index_label, True, (str, list)])
279
417
  awu_matrix.append(["query", query, True, (str), True])
280
418
  awu_matrix.append(["materialize", materialize, True, (bool)])
419
+ awu_matrix.append(["table_name", table_name, True, (str), True])
420
+ awu_matrix.append(["primary_index", primary_index, True, (str, list)])
421
+ awu_matrix.append(["types", types, True, (dict)])
422
+ awu_matrix.append(["columns", columns, True, (str, list), True])
281
423
 
282
424
  # Validate argument types
283
425
  _Validators._validate_function_arguments(awu_matrix)
284
426
 
427
+ # Convert columns to list if it is a string.
428
+ if isinstance(columns, str):
429
+ columns = [columns]
430
+
285
431
  try:
286
- if table_name is not None:
432
+ if table_name is not None or data is not None:
433
+
434
+ # If data is list or numpy array or dictionary, then convert it to a pandas DataFrame.
435
+ if isinstance(data, (list, np.ndarray, dict)):
436
+ data = pd.DataFrame(data, columns=columns)
437
+ # If the data is a pandas DataFrame, then store the data in a temporary table in Vantage.
438
+ if isinstance(data, pd.DataFrame):
439
+ # Create a copy of the pandas DataFrame to avoid modifying the original,
440
+ # because column names will be changed if they are integers.
441
+ pd_data = data.copy()
442
+ # If the columns are not of type string, then convert them to string.
443
+ pd_data.columns = [f"col_{i}" if isinstance(i, int) else i for i in pd_data.columns]
444
+ # Set the table_name to the name of the table created in the database.
445
+ table_name = UtilFuncs._generate_temp_table_name(prefix="from_pandas",
446
+ table_type=TeradataConstants.TERADATA_TABLE)
447
+
448
+ copy_to_sql(pd_data, table_name, index=index, index_label=index_label, primary_index=primary_index,
449
+ types=types)
450
+ # If the data is a string, then set the table_name to the data.
451
+ elif isinstance(data, str):
452
+ table_name = data
453
+
287
454
  self._table_name = UtilFuncs._quote_table_names(table_name)
288
455
  self._source_type = SourceType.TABLE.value
289
456
  self._nodeid = self._aed_utils._aed_table(self._table_name)
@@ -337,6 +504,12 @@ class DataFrame():
337
504
  elif "[Error 3706] Syntax error" in str(oe):
338
505
  raise ValueError(Messages.get_message(
339
506
  MessageCodes.FROM_QUERY_SELECT_SUPPORTED).format("Check the syntax."))
507
+ elif "[Error 7825]" in str(oe):
508
+ # The UDF/XSP/UDM routine has thrown an SQLException
509
+ # with an SQL state in the range of 38001-38999 which
510
+ # is not a syntax error. Hence not a ValueError wrt query string.
511
+ # Expected when OTF snapshot related query is executed.
512
+ raise
340
513
  raise ValueError(Messages.get_message(
341
514
  MessageCodes.FROM_QUERY_SELECT_SUPPORTED))
342
515
 
@@ -498,7 +671,7 @@ class DataFrame():
498
671
  Types: str
499
672
 
500
673
  EXAMPLES:
501
- >>> from teradataml.dataframe.dataframe import DataFrame
674
+ >>> from teradataml import DataFrame
502
675
 
503
676
  # Example 1: The following example creates a DataFrame from a table or
504
677
  a view.
@@ -538,9 +711,9 @@ class DataFrame():
538
711
 
539
712
  """
540
713
  if schema_name:
541
- return cls(in_schema(schema_name, table_name, datalake_name))
542
-
543
- return cls(table_name, index, index_label)
714
+ return cls(table_name=in_schema(schema_name, table_name, datalake_name),
715
+ index=index, index_label=index_label)
716
+ return cls(table_name=table_name, index=index, index_label=index_label)
544
717
 
545
718
  @classmethod
546
719
  @collect_queryband(queryband="DF_fromQuery")
@@ -687,6 +860,300 @@ class DataFrame():
687
860
  df.__setattr__(arg, arg_value)
688
861
  return df
689
862
 
863
+ @classmethod
864
+ @collect_queryband(queryband="DF_fromPandas")
865
+ def from_pandas(cls, pandas_df, index=True, index_label=None, primary_index=None):
866
+ """
867
+ DESCRIPTION:
868
+ Creates a teradataml DataFrame from a pandas DataFrame.
869
+
870
+ PARAMETERS:
871
+ pandas_df:
872
+ Required Argument.
873
+ Specifies the pandas DataFrame to be converted to teradataml DataFrame.
874
+ Types: pandas DataFrame
875
+
876
+ index:
877
+ Optional Argument.
878
+ Specifies whether to save Pandas DataFrame index as a column or not.
879
+ Default Value: True
880
+ Types: bool
881
+
882
+ index_label:
883
+ Optional Argument.
884
+ Specifies the column label(s) for Pandas DataFrame index column(s).
885
+ Note:
886
+ * Refer to the "index_label" parameter of copy_to_sql() for more details.
887
+ Default Value: None
888
+ Types: str OR list of str
889
+
890
+ primary_index:
891
+ Optional Argument.
892
+ Specifies which column(s) to use as primary index for the teradataml DataFrame.
893
+ Types: str OR list of str
894
+
895
+ RETURNS:
896
+ teradataml DataFrame
897
+
898
+ RAISES:
899
+ TeradataMlException
900
+
901
+ EXAMPLES:
902
+ >>> import pandas as pd
903
+ >>> from teradataml import DataFrame
904
+ >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
905
+ >>> pdf1 = pd.DataFrame([[1, 2], [3, 4]])
906
+
907
+ # Example 1: Create a teradataml DataFrame from a pandas DataFrame.
908
+ >>> df = DataFrame.from_pandas(pdf)
909
+ >>> df
910
+ col1 col2 index_label
911
+ 0 3 6 2
912
+ 1 2 5 1
913
+ 2 1 4 0
914
+
915
+ # Example 2: Create a teradataml DataFrame from a pandas DataFrame
916
+ # and do not save the index as a column.
917
+ >>> df = DataFrame.from_pandas(pdf, index=False)
918
+ >>> df
919
+ col1 col2
920
+ 0 3 6
921
+ 1 2 5
922
+ 2 1 4
923
+
924
+ # Example 3: Create a teradataml DataFrame from a pandas DataFrame
925
+ # with index label as 'id' and set it as primary index.
926
+ >>> df = DataFrame.from_pandas(pdf, index=True, index_label='id', primary_index='id')
927
+ >>> df
928
+ col1 col2
929
+ id
930
+ 2 3 6
931
+ 1 2 5
932
+ 0 1 4
933
+
934
+ # Example 4: Create a teradataml DataFrame from a pandas DataFrame where
935
+ # columns are not explicitly defined in the pandas DataFrame.
936
+ >>> df = DataFrame.from_pandas(pdf1)
937
+ >>> df
938
+ col_0 col_1 index_label
939
+ 0 3 4 1
940
+ 1 1 2 0
941
+ """
942
+ # Validate 'pandas_df' argument, other arguments, will be validated as part of DataFrame().
943
+ arg_type_matrix = []
944
+ arg_type_matrix.append(["pandas_df", pandas_df, False, (pd.DataFrame,), True])
945
+
946
+ _Validators._validate_function_arguments(arg_type_matrix)
947
+
948
+ return cls(pandas_df, index, index_label, primary_index=primary_index)
949
+
950
+ @classmethod
951
+ @collect_queryband(queryband="DF_fromDict")
952
+ def from_dict(cls, data, columns=None):
953
+ """
954
+ DESCRIPTION:
955
+ Creates a DataFrame from a dictionary containing values as lists or numpy arrays.
956
+
957
+ PARAMETERS:
958
+ data:
959
+ Required Argument.
960
+ Specifies the Python dictionary to create a teradataml DataFrame.
961
+ Notes:
962
+ * Keys of the dictionary are used as column names.
963
+ * Values of the dictionary should be lists or numpy arrays.
964
+ * Nested dictionaries are not supported.
965
+ Types: dict
966
+
967
+ columns:
968
+ Optional Argument.
969
+ Specifies the column names for the DataFrame.
970
+ Types: str OR list of str
971
+
972
+ RETURNS:
973
+ teradataml DataFrame
974
+
975
+ RAISES:
976
+ TeradataMlException
977
+
978
+ EXAMPLES:
979
+ >>> from teradataml import DataFrame
980
+ >>> data_dict = {"name": ["Alice", "Bob", "Charlie"], "age": [25, 30, 28]}
981
+
982
+ # Example 1: Create a teradataml DataFrame from a dictionary where
983
+ # keys are column names and values are lists of column data.
984
+ >>> df = DataFrame.from_dict(data_dict)
985
+ >>> df
986
+ name age
987
+ 0 Charlie 28
988
+ 1 Bob 30
989
+ 2 Alice 25
990
+
991
+ # Example 2: Create a teradataml DataFrame from a dictionary where
992
+ # keys are column names and values are numpy arrays.
993
+ >>> import numpy as np
994
+ >>> data_dict = {"col1": np.array([1, 2, 3]), "col2": np.array([4, 5, 6])}
995
+ >>> df = DataFrame.from_dict(data_dict)
996
+ >>> df
997
+ col1 col2
998
+ 0 3 6
999
+ 1 2 5
1000
+ 2 1 4
1001
+ """
1002
+ arg_type_matrix = []
1003
+ arg_type_matrix.append(["data", data, False, (dict), True])
1004
+ arg_type_matrix.append(["columns", columns, True, (str, list), True])
1005
+
1006
+ _Validators._validate_function_arguments(arg_type_matrix)
1007
+
1008
+ return cls(data, columns=columns, index=False)
1009
+
1010
+ @classmethod
1011
+ @collect_queryband(queryband="DF_fromRecords")
1012
+ def from_records(cls, data, columns=None, **kwargs):
1013
+ """
1014
+ DESCRIPTION:
1015
+ Create a DataFrame from a list of lists/tuples/dictionaries/numpy arrays.
1016
+
1017
+ PARAMETERS:
1018
+ data:
1019
+ Required Argument.
1020
+ Specifies the iterator of data or the list of lists/tuples/dictionaries/numpy arrays to
1021
+ be converted to teradataml DataFrame.
1022
+ Note:
1023
+ * Nested lists or tuples or dictionaries are not supported.
1024
+ Types: Iterator, list
1025
+
1026
+ columns:
1027
+ Optional Argument.
1028
+ Specifies the column names for the DataFrame.
1029
+ Note:
1030
+ * If the data is a list of lists/tuples/numpy arrays and this argument
1031
+ is not specified, column names will be auto-generated as 'col_0', 'col_1', etc.
1032
+ Types: str OR list of str
1033
+
1034
+ kwargs:
1035
+ exclude:
1036
+ Optional Argument.
1037
+ Specifies the columns to be excluded from the DataFrame.
1038
+ Types: list OR tuple
1039
+
1040
+ coerce_float:
1041
+ Optional Argument.
1042
+ Specifies whether to convert values of non-string, non-numeric objects (like decimal.Decimal)
1043
+ to floating point, useful for SQL result sets.
1044
+ Default Value: True
1045
+ Types: bool
1046
+
1047
+ nrows:
1048
+ Optional Argument.
1049
+ Specifies the number of rows to be read from the data if the data is iterator.
1050
+ Types: int
1051
+
1052
+ RETURNS:
1053
+ teradataml DataFrame
1054
+
1055
+ RAISES:
1056
+ TeradataMlException
1057
+
1058
+ EXAMPLES:
1059
+ >>> from teradataml import DataFrame
1060
+
1061
+ # Example 1: Create a teradataml DataFrame from a list of lists.
1062
+ >>> df = DataFrame.from_records([['Alice', 1], ['Bob', 2]], columns=['name', 'age'])
1063
+ >>> df
1064
+ name age
1065
+ 0 Bob 2
1066
+ 1 Alice 1
1067
+
1068
+ # Example 2: Create a teradataml DataFrame from a list of tuples.
1069
+ >>> df = DataFrame.from_records([('Alice', 1), ('Bob', 3)], columns=['name', 'age'])
1070
+ >>> df
1071
+ name age
1072
+ 0 Bob 3
1073
+ 1 Alice 1
1074
+
1075
+ # Example 3: Create a teradataml DataFrame from a list of dictionaries.
1076
+ >>> df = DataFrame.from_records([{'name': 'Alice', 'age': 4}, {'name': 'Bob', 'age': 2}])
1077
+ >>> df
1078
+ name age
1079
+ 0 Bob 2
1080
+ 1 Alice 4
1081
+
1082
+ # Example 4: Create a teradataml DataFrame from a list where columns
1083
+ # are not explicitly defined.
1084
+ >>> df = DataFrame.from_records([['Alice', 1], ['Bob', 2]])
1085
+ >>> df
1086
+ col_0 col_1
1087
+ 0 Bob 2
1088
+ 1 Alice 1
1089
+
1090
+ # Example 5: Create a teradataml DataFrame from a list by excluding 'grade' column.
1091
+ >>> df = DataFrame.from_records([['Alice', 1, 'A'], ['Bob', 2, 'B']],
1092
+ ... columns=['name', 'age', 'grade'],
1093
+ ... exclude=['grade'])
1094
+ >>> df
1095
+ name age
1096
+ 0 Bob 2
1097
+ 1 Alice 1
1098
+
1099
+ # Example 6: Create a teradataml DataFrame from a list of lists
1100
+ # with "coerce_float" set to False.
1101
+ >>> df = DataFrame.from_records([[1, Decimal('2.5')], [3, Decimal('4.0')]],
1102
+ ... columns=['col1', 'col2'], coerce_float=False)
1103
+ >>> df
1104
+ col1 col2
1105
+ 0 3 4.0
1106
+ 1 1 2.5
1107
+ >>> df.tdtypes
1108
+ col1 BIGINT()
1109
+ col2 VARCHAR(length=1024, charset='UNICODE')
1110
+
1111
+ # Example 7: Create a teradataml DataFrame from a list of lists
1112
+ # with "coerce_float" set to True.
1113
+ >>> from decimal import Decimal
1114
+ >>> df = DataFrame.from_records([[1, Decimal('2.5')], [3, Decimal('4.0')]],
1115
+ ... columns=['col1', 'col2'], coerce_float=True)
1116
+ >>> df
1117
+ col1 col2
1118
+ 0 3 4.0
1119
+ 1 1 2.5
1120
+ >>> df.tdtypes
1121
+ col1 BIGINT()
1122
+ col2 FLOAT()
1123
+
1124
+ # Example 8: Create a teradataml DataFrame from an iterator with "nrows" set to 2.
1125
+ >>> def data_gen():
1126
+ ... yield ['Alice', 1]
1127
+ ... yield ['Bob', 2]
1128
+ ... yield ['Charlie', 3]
1129
+ >>> df = DataFrame.from_records(data_gen(), columns=['name', 'age'], nrows=2)
1130
+ >>> df
1131
+ name age
1132
+ 0 Bob 2
1133
+ 1 Alice 1
1134
+ """
1135
+
1136
+ exclude = kwargs.get("exclude", None)
1137
+ coerce_float = kwargs.get("coerce_float", True)
1138
+ nrows = kwargs.get("nrows", None)
1139
+
1140
+ arg_type_matrix = []
1141
+ dtypes = (list, tuple, dict)
1142
+ arg_type_matrix.append(["data", data, False, (Iterator, _ListOf(dtypes)), True])
1143
+ arg_type_matrix.append(["columns", columns, True, (str, _ListOf(str)), True])
1144
+ arg_type_matrix.append(["exclude", exclude, True, (_ListOf(str),), True])
1145
+ arg_type_matrix.append(["coerce_float", coerce_float, True, (bool, ), True])
1146
+ arg_type_matrix.append(["nrows", nrows, True, (int,), True])
1147
+
1148
+ _Validators._validate_function_arguments(arg_type_matrix)
1149
+
1150
+ if isinstance(columns, str):
1151
+ columns = [columns]
1152
+
1153
+ df = pd.DataFrame.from_records(data, columns=columns, exclude=exclude,
1154
+ coerce_float=coerce_float, nrows=nrows)
1155
+ return cls(df, index=False)
1156
+
690
1157
  def create_temp_view(self, name):
691
1158
  """
692
1159
  DESCRIPTION:
@@ -1144,9 +1611,19 @@ class DataFrame():
1144
1611
  datalake=self._datalake)
1145
1612
 
1146
1613
  # Extract column names and corresponding teradatasqlalchemy types.
1147
- col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
1148
- self._table,
1149
- self._datalake)
1614
+ try:
1615
+ # For latest OTF help table query results.
1616
+ col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
1617
+ self._table,
1618
+ self._datalake,
1619
+ use_dialect=True)
1620
+ except NoSuchColumnError:
1621
+ # For older OTF help table query result.
1622
+ col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
1623
+ self._table,
1624
+ self._datalake)
1625
+
1626
+ # Create a SQLAlchemy table object representing datalake table.
1150
1627
  t = sqlalchemy.Table(self._table, meta, schema=self._database,
1151
1628
  *(Column(col_name, col_type) for col_name, col_type in zip(col_names, col_types)))
1152
1629
  return _MetaExpression(t)
@@ -2924,9 +3401,8 @@ class DataFrame():
2924
3401
  msg = Messages.get_message(errcode)
2925
3402
  raise TeradataMlException(msg, errcode)
2926
3403
 
2927
- @argument_deprecation("20.0.0.5", "include", False, None)
2928
3404
  @collect_queryband(queryband="DF_describe")
2929
- def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None,
3405
+ def describe(self, percentiles=[.25, .5, .75], verbose=False, distinct=False, statistics=None,
2930
3406
  columns=None, pivot=False):
2931
3407
  """
2932
3408
  DESCRIPTION:
@@ -2956,18 +3432,6 @@ class DataFrame():
2956
3432
  Default Values: [.25, .5, .75], which returns the 25th, 50th, and 75th percentiles.
2957
3433
  Types: float or List of floats
2958
3434
 
2959
- include:
2960
- Optional Argument.
2961
- Values can be either None or "all".
2962
- If the value is "all", both numeric and non-numeric columns are included.
2963
- Computes count, mean, std, min, percentiles, and max for numeric columns.
2964
- Computes count and unique for non-numeric columns.
2965
- If the value is None, only numeric columns are used for collecting statistics.
2966
- Note:
2967
- * Value 'all' is not applicable for 'Time Series Aggregate Mode'.
2968
- Default Values: None
2969
- Types: str
2970
-
2971
3435
  verbose:
2972
3436
  Optional Argument.
2973
3437
  Specifies a boolean value to be used for time series aggregation, stating whether to get
@@ -2994,7 +3458,6 @@ class DataFrame():
2994
3458
  Computes count and unique for non-numeric columns.
2995
3459
  Notes:
2996
3460
  1. statistics is not applicable for 'Time Series Aggregate Mode'.
2997
- 2. statistics should not be used with include as 'all'.
2998
3461
  Permitted Values: count, mean, min, max, unique, std, describe, percentile
2999
3462
  Default Values: None
3000
3463
  Types: str or List of str
@@ -3310,7 +3773,6 @@ class DataFrame():
3310
3773
  awu_matrix = []
3311
3774
  awu_matrix.append(["columns", columns, True, (str, list), True])
3312
3775
  awu_matrix.append(["percentiles", percentiles, True, (float, list)])
3313
- awu_matrix.append(["include", include, True, (str), True, [None, "all"]])
3314
3776
  awu_matrix.append(["verbose", verbose, True, (bool)])
3315
3777
  awu_matrix.append(["distinct", distinct, True, (bool)])
3316
3778
  awu_matrix.append(["statistics", statistics, True, (str, list), True,
@@ -3334,22 +3796,11 @@ class DataFrame():
3334
3796
  if statistics:
3335
3797
  statistics = [stats.lower() for stats in UtilFuncs._as_list(statistics)]
3336
3798
 
3337
- # Argument include and statistics should not be used together
3338
- if include is not None and statistics is not None:
3339
- raise ValueError(Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH).format(
3340
- 'include', 'statistics'
3341
- ))
3342
-
3343
3799
  # Percentiles must be a list of values between 0 and 1.
3344
3800
  if not isinstance(percentiles, list) or not all(p > 0 and p < 1 for p in percentiles):
3345
3801
  raise ValueError(Messages.get_message(MessageCodes.INVALID_ARG_VALUE, percentiles, "percentiles",
3346
3802
  "percentiles must be a list of values between 0 and 1"))
3347
3803
 
3348
- # Argument 'include' with value 'all' is not allowed for DataFrameGroupByTime
3349
- if include is not None and include.lower() == "all" and isinstance(self, DataFrameGroupByTime):
3350
- raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
3351
- 'include', 'Aggregation', 'all', 'describe()', 'DataFrame or DataFrameGroupBy'))
3352
-
3353
3804
  # Argument 'statistics' is not allowed for DataFrameGroupByTime
3354
3805
  if statistics is not None and isinstance(self, DataFrameGroupByTime):
3355
3806
  raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
@@ -3383,7 +3834,7 @@ class DataFrame():
3383
3834
  # Construct the aggregate query.
3384
3835
  agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
3385
3836
  percentiles=percentiles, function_label=function_label,
3386
- groupby_column_list=groupby_column_list, include=include,
3837
+ groupby_column_list=groupby_column_list, include=None,
3387
3838
  is_time_series_aggregate=True, verbose=verbose,
3388
3839
  distinct=distinct,
3389
3840
  timebucket_duration=self._timebucket_duration,
@@ -3414,7 +3865,7 @@ class DataFrame():
3414
3865
  # Construct the aggregate query.
3415
3866
  agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
3416
3867
  percentiles=percentiles, function_label=function_label,
3417
- groupby_column_list=groupby_column_list, include=include,
3868
+ groupby_column_list=groupby_column_list, include=None,
3418
3869
  is_time_series_aggregate=False, verbose=verbose,
3419
3870
  distinct=distinct, statistics=statistics)
3420
3871
 
@@ -5570,8 +6021,10 @@ class DataFrame():
5570
6021
  Specifies the function(s) to apply on DataFrame columns.
5571
6022
 
5572
6023
  Valid values for func are:
5573
- 'count', 'sum', 'min', 'max', 'mean', 'std', 'percentile', 'unique',
5574
- 'median', 'var'
6024
+ * 'count', 'sum', 'min', 'max', 'mean', 'std', 'percentile', 'percentile_<floatvalue>', 'unique',
6025
+ 'median', 'var'
6026
+ * Note: In 'percentile_<floatvalue>', <floatvalue> specifies the desired percentile value to
6027
+ calculate aggregate. It should be in the range of 0.0 to 1.0 (both inclusive).
5575
6028
 
5576
6029
  Acceptable formats for function(s) are
5577
6030
  string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
@@ -5605,12 +6058,17 @@ class DataFrame():
5605
6058
  Output column names after the above operation are:
5606
6059
  min_employee_no, sum_employee_no, var_employee_no, min_first_name
5607
6060
 
5608
- 4. "func" passed as a ColumnExpression built using the aggregate functions.
6061
+ 4. "percentile_<floatvalue>" passed to agg.
6062
+ >>> df.agg({'employee_no' : ['percentile_0.25', 'percentile_0.75', 'min']})
6063
+ >>> df.agg(['percentile_0.25', 'percentile_0.75', 'sum'])
6064
+ >>> df.agg('percentile_0.25')
6065
+
6066
+ 5. "func" passed as a ColumnExpression built using the aggregate functions.
5609
6067
  >>> df.agg(df.first_name.count())
5610
6068
  Output column name after the above operation is:
5611
6069
  count(first_name)
5612
6070
 
5613
- 5. "func" passed as a list of ColumnExpression built using the aggregate functions.
6071
+ 6. "func" passed as a list of ColumnExpression built using the aggregate functions.
5614
6072
  >>> df.agg([df.employee_no.min(), df.first_name.count()])
5615
6073
  Output column names after the above operation are:
5616
6074
  min(employee_no), count(first_name)
@@ -5698,6 +6156,12 @@ class DataFrame():
5698
6156
  min_employee_no sum_employee_no var_employee_no min_first_name
5699
6157
  0 100 313 44.333333 abcd
5700
6158
 
6159
+ # Get the minimum, 25 percentile value and variance of employee number, by passing dictionary of
6160
+ # column names to string function/list of string functions as parameter.
6161
+ >>> df.agg({'employee_no' : ['min', 'percentile_0.25', 'var']})
6162
+ min_employee_no percentile_0.25_employee_no var_employee_no
6163
+ 0 100 100 44.333333
6164
+
5701
6165
  # Get the minimum and sum of all the columns in the dataframe,
5702
6166
  # by passing list of string functions as parameter.
5703
6167
  >>> df.agg(['min', 'sum'])
@@ -5743,9 +6207,15 @@ class DataFrame():
5743
6207
  mean_employee_no unique_employee_no unique_first_name mean_joined_date unique_joined_date
5744
6208
  0 104.333333 3 2 60/12/04 2
5745
6209
 
6210
+ # Get the percentile of each column in the dataframe with default value 0.5.
5746
6211
  >>> df.agg('percentile')
5747
- percentile_employee_no percentile_marks
5748
- 0 101 None
6212
+ percentile_employee_no percentile_marks
6213
+ 0 101 None
6214
+
6215
+ # Get 80 percentile of each column in the datafame.
6216
+ >>> df.agg('percentile_0.8')
6217
+ percentile_0.8_employee_no percentile_0.8_marks
6218
+ 0 107 None
5749
6219
 
5750
6220
  # Using another table 'sales' (having repeated values) to demonstrate operations
5751
6221
  # 'unique' and 'percentile'.
@@ -5762,9 +6232,11 @@ class DataFrame():
5762
6232
  Blue Inc 90.0 50 95 101 2017-04-01
5763
6233
  Red Inc 200.0 150 140 None 2017-04-01
5764
6234
 
5765
- >>> df.agg('percentile')
5766
- percentile_Feb percentile_Jan percentile_Mar percentile_Apr
5767
- 0 200.0 150 140 215
6235
+ # Get 80 and 40 percentile values of each column in the dataframe.
6236
+ >>> df1 = df.select(['Feb', 'Jan', 'Mar', 'Apr'])
6237
+ >>> df1.agg(['percentile_0.8', 'percentile_0.4'])
6238
+ percentile_0.8_Feb percentile_0.4_Feb percentile_0.8_Jan percentile_0.4_Jan percentile_0.8_Mar percentile_0.4_Mar percentile_0.8_Apr percentile_0.4_Apr
6239
+ 0 210.0 200.0 170 150 170 140 250 194
5768
6240
 
5769
6241
  >>> df.agg('unique')
5770
6242
  unique_accounts unique_Feb unique_Jan unique_Mar unique_Apr unique_datetime
@@ -5888,8 +6360,11 @@ class DataFrame():
5888
6360
  groupby_col_names.append(col)
5889
6361
  groupby_col_types.append(self[col].type)
5890
6362
 
5891
- if col in col_names:
5892
- # If group by column is not specified in the columns argument,
6363
+ include_grouping_columns = True if isinstance(self, DataFrameGroupBy) and \
6364
+ self._include_grouping_columns else False
6365
+ if not include_grouping_columns and col in col_names:
6366
+ # If 'include_grouping_columns' argument is set to True and,
6367
+ # group by column is not specified in the columns argument,
5893
6368
  # then, we should ignore this processing, otherwise we
5894
6369
  # should process it in the same way to remove the reference
5895
6370
  # for grouping column from aggregation list.
@@ -5951,6 +6426,8 @@ class DataFrame():
5951
6426
 
5952
6427
  except TeradataMlException:
5953
6428
  raise
6429
+ except ValueError:
6430
+ raise
5954
6431
  except Exception as err:
5955
6432
  raise TeradataMlException(Messages.get_message(
5956
6433
  MessageCodes.EXECUTION_FAILED, "perform {} on DataFrame".format(operation), str(err)),
@@ -7760,7 +8237,7 @@ class DataFrame():
7760
8237
  """
7761
8238
  return (type(None), int, float, str, decimal.Decimal, ColumnExpression, ClauseElement)
7762
8239
 
7763
- def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
8240
+ def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, node_id, **kwargs):
7764
8241
  """
7765
8242
  DESCRIPTION:
7766
8243
  Function generates the MetaExpression and AED nodeid for DataFrame.assign()
@@ -7773,6 +8250,11 @@ class DataFrame():
7773
8250
  Default Value: False
7774
8251
  Types: bool
7775
8252
 
8253
+ node_id:
8254
+ Optional Argument.
8255
+ Specifies the input nodeid for the assign operation.
8256
+ Types: str
8257
+
7776
8258
  kwargs:
7777
8259
  keyword, value pairs
7778
8260
  - keywords are the column names.
@@ -7800,7 +8282,7 @@ class DataFrame():
7800
8282
 
7801
8283
  # Join the expressions in result.
7802
8284
  assign_expression = ', '.join(list(map(lambda x: x[1], result)))
7803
- new_nodeid = self._aed_utils._aed_assign(self._nodeid,
8285
+ new_nodeid = self._aed_utils._aed_assign(node_id,
7804
8286
  assign_expression,
7805
8287
  AEDConstants.AED_ASSIGN_DROP_EXISITING_COLUMNS.value)
7806
8288
 
@@ -7939,7 +8421,7 @@ class DataFrame():
7939
8421
  env_mapper[env_name] = [colname]
7940
8422
  else:
7941
8423
  env_mapper[env_name] = udf_expr.keys()
7942
-
8424
+ debug = False
7943
8425
  for env_name, cols in env_mapper.items():
7944
8426
  # Create a dictionary of output columns to column type.
7945
8427
  returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
@@ -7950,6 +8432,7 @@ class DataFrame():
7950
8432
  # Create a dictionary of output column name to udf arguments
7951
8433
  function_args = {}
7952
8434
  for colname, col in udf_expr.items():
8435
+ debug |= col._debug
7953
8436
  delimiter = col._delimiter
7954
8437
  quotechar = col._quotechar
7955
8438
  if colname in cols:
@@ -7982,7 +8465,9 @@ class DataFrame():
7982
8465
  columns_definitions=columns_definitions,
7983
8466
  output_type_converters={
7984
8467
  col_name: _Dtypes._teradata_type_to_python_type(col_type)
7985
- for col_name, col_type in returns.items()})
8468
+ for col_name, col_type in returns.items()},
8469
+ debug=debug
8470
+ )
7986
8471
 
7987
8472
  df = tbl_operators.execute()
7988
8473
  return df
@@ -8624,8 +9109,34 @@ class DataFrame():
8624
9109
  # from udf expression.
8625
9110
  if bool(regular_expr):
8626
9111
  try:
8627
- (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(drop_columns, **regular_expr)
9112
+ root_node_id = None
9113
+ root_df_col = df.columns
9114
+
9115
+ # Get the previous node type, if it is assign and drop_columns is False,
9116
+ # then check if the previous assign arguments exists and are not present
9117
+ # in either the root dataframe columns or the current assign arguments.
9118
+ # if these conditions are met, obtain the root node id (i.e., the first
9119
+ # node of the assign operation) and merge the previous assign arguments with the current ones.
9120
+
9121
+ prev_node_type = df._aed_utils._aed_get_node_query_type(df._nodeid)
9122
+ if not drop_columns and prev_node_type == "assign" and df._previous_assign_args is not None:
9123
+ if not df._root_columns & df._previous_assign_args.keys() and \
9124
+ not df._previous_assign_args.keys() & regular_expr.keys():
9125
+ # Get the root node id and root dataframe columns.
9126
+ root_df_col = df._root_columns
9127
+ root_node_id = df._aed_utils._aed_get_parent_nodeids(df._nodeid)[0]
9128
+ regular_expr = {**df._previous_assign_args, **regular_expr}
9129
+
9130
+ # If root_node_id is None, assign the current node id as root node of assign operation
9131
+ node_id = root_node_id if root_node_id is not None else df._nodeid
9132
+
9133
+ # Generate new meta expression and node id for the new dataframe.
9134
+ (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(
9135
+ drop_columns, node_id = node_id, **regular_expr)
8628
9136
  df = df._create_dataframe_from_node(new_nodeid, new_meta, df._index_label)
9137
+ df._previous_assign_args = regular_expr
9138
+ df._root_columns = root_df_col
9139
+
8629
9140
  except Exception as err:
8630
9141
  errcode = MessageCodes.TDMLDF_INFO_ERROR
8631
9142
  msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
@@ -8962,6 +9473,15 @@ class DataFrame():
8962
9473
  Permitted Values: "CUBE", "ROLLUP", None
8963
9474
  Types: str or NoneType
8964
9475
 
9476
+ include_grouping_columns:
9477
+ Optional Argument.
9478
+ Specifies whether to include aggregations on the grouping column(s) or not.
9479
+ When set to True, the resultant DataFrame will have the aggregations on the
9480
+ columns mentioned in "columns_expr". Otherwise, resultant DataFrame will not have
9481
+ aggregations on the columns mentioned in "columns_expr".
9482
+ Default Value: False
9483
+ Types: bool
9484
+
8965
9485
  NOTES:
8966
9486
  1. Users can still apply teradataml DataFrame methods (filters/sort/etc) on top of the result.
8967
9487
  2. Consecutive operations of grouping, i.e., groupby_time(), resample() and groupby() are not permitted.
@@ -8978,14 +9498,54 @@ class DataFrame():
8978
9498
  TeradataMlException
8979
9499
 
8980
9500
  EXAMPLES:
9501
+ # Load the data to run the example.
8981
9502
  >>> load_example_data("dataframe","admissions_train")
9503
+
9504
+ # Create a DataFrame on 'admissions_train' table.
8982
9505
  >>> df = DataFrame("admissions_train")
9506
+ >>> df
9507
+ masters gpa stats programming admitted
9508
+ id
9509
+ 15 yes 4.00 Advanced Advanced 1
9510
+ 34 yes 3.85 Advanced Beginner 0
9511
+ 13 no 4.00 Advanced Novice 1
9512
+ 38 yes 2.65 Advanced Beginner 1
9513
+ 5 no 3.44 Novice Novice 0
9514
+ 40 yes 3.95 Novice Beginner 0
9515
+ 7 yes 2.33 Novice Novice 1
9516
+ 22 yes 3.46 Novice Beginner 0
9517
+ 26 yes 3.57 Advanced Advanced 1
9518
+ 17 no 3.83 Advanced Advanced 1
9519
+
9520
+ # Example 1: Find the minimum value of all valid columns by
9521
+ # grouping the DataFrame with column 'masters'.
8983
9522
  >>> df1 = df.groupby(["masters"])
8984
9523
  >>> df1.min()
8985
9524
  masters min_id min_gpa min_stats min_programming min_admitted
8986
9525
  0 no 3 1.87 Advanced Advanced 0
8987
9526
  1 yes 1 1.98 Advanced Advanced 0
8988
9527
 
9528
+ # Example 2: Find the sum of all valid columns by grouping the DataFrame
9529
+ # with columns 'masters' and 'admitted'. Include grouping columns
9530
+ # in aggregate function 'sum'.
9531
+ >>> df1 = df.groupby(["masters", "admitted"], include_grouping_columns=True)
9532
+ >>> df1.sum()
9533
+ masters admitted sum_id sum_gpa sum_admitted
9534
+ 0 yes 1 188 34.35 10
9535
+ 1 yes 0 289 43.36 0
9536
+ 2 no 0 41 6.44 0
9537
+ 3 no 1 302 57.52 16
9538
+
9539
+ # Example 3: Find the sum of all valid columns by grouping the DataFrame with
9540
+ # columns 'masters' and 'admitted'. Do not include grouping columns
9541
+ # in aggregate function 'sum'.
9542
+ >>> df1 = df.groupby(["masters", "admitted"], include_grouping_columns=False)
9543
+ >>> df1.sum()
9544
+ masters admitted sum_id sum_gpa
9545
+ 0 yes 0 289 43.36
9546
+ 1 no 0 41 6.44
9547
+ 2 no 1 302 57.52
9548
+ 3 yes 1 188 34.35
8989
9549
  """
8990
9550
  # Argument validations
8991
9551
  arg_info_matrix = []
@@ -8993,6 +9553,8 @@ class DataFrame():
8993
9553
  option = kwargs.get("option", None)
8994
9554
  arg_info_matrix.append(["option", option, True, (str, type(None)), True,
8995
9555
  ["CUBE", "ROLLUP", None]])
9556
+ include_grouping_columns = kwargs.get("include_grouping_columns", False)
9557
+ arg_info_matrix.append(["include_grouping_columns", include_grouping_columns, True, (bool)])
8996
9558
 
8997
9559
  # Validate argument types
8998
9560
  _Validators._validate_function_arguments(arg_info_matrix)
@@ -9037,7 +9599,8 @@ class DataFrame():
9037
9599
 
9038
9600
  groupbyexpr = ', '.join(UtilFuncs._teradata_quote_arg(col, "\"", False) for col in column_list)
9039
9601
  groupbyObj = DataFrameGroupBy(self._nodeid, self._metaexpr, self._column_names_and_types, self.columns,
9040
- groupbyexpr, column_list, option)
9602
+ groupbyexpr, column_list, option, include_grouping_columns)
9603
+
9041
9604
  return groupbyObj
9042
9605
  except TeradataMlException:
9043
9606
  raise
@@ -11569,6 +12132,10 @@ class DataFrame():
11569
12132
  DESCRIPTION:
11570
12133
  Function to apply a user defined function to each row in the
11571
12134
  teradataml DataFrame, leveraging Vantage's Script Table Operator.
12135
+ Notes:
12136
+ 1. The function requires to use same Python version in both Vantage and local environment.
12137
+ 2. Teradata recommends to use "dill" package with same version in both Vantage and
12138
+ local environment.
11572
12139
 
11573
12140
  PARAMETERS:
11574
12141
  user_function:
@@ -11749,6 +12316,15 @@ class DataFrame():
11749
12316
  Default Value: True
11750
12317
  Types: bool
11751
12318
 
12319
+ debug:
12320
+ Optional Argument.
12321
+ Specifies whether to display the script file path generated during function execution or not. This
12322
+ argument helps in debugging when there are any failures during function execution. When set
12323
+ to True, function displays the path of the script and does not remove the file from local file system.
12324
+ Otherwise, file is removed from the local file system.
12325
+ Default Value: False
12326
+ Types: bool
12327
+
11752
12328
  RETURNS:
11753
12329
  1. teradataml DataFrame if exec_mode is "IN-DB".
11754
12330
  2. Pandas DataFrame if exec_mode is "LOCAL".
@@ -11901,6 +12477,7 @@ class DataFrame():
11901
12477
  sort_ascending = kwargs.pop('sort_ascending', True)
11902
12478
  auth = kwargs.pop('auth', None)
11903
12479
  charset = kwargs.pop('charset', None)
12480
+ debug = kwargs.pop('debug', False)
11904
12481
 
11905
12482
  # Check for other extra/unknown arguments.
11906
12483
  unknown_args = list(kwargs.keys())
@@ -11919,7 +12496,7 @@ class DataFrame():
11919
12496
  sort_ascending=sort_ascending,
11920
12497
  returns=returns, delimiter=delimiter,
11921
12498
  quotechar=quotechar, auth=auth,
11922
- charset=charset, num_rows=num_rows)
12499
+ charset=charset, num_rows=num_rows, debug=debug)
11923
12500
 
11924
12501
  return tbl_op_util.execute()
11925
12502
 
@@ -11936,6 +12513,10 @@ class DataFrame():
11936
12513
  DESCRIPTION:
11937
12514
  Function to apply a user defined function to a group or partition of rows
11938
12515
  in the teradataml DataFrame, leveraging Vantage's Script Table Operator.
12516
+ Notes:
12517
+ 1. The function requires to use same Python version in both Vantage and local environment.
12518
+ 2. Teradata recommends to use "dill" package with same version in both Vantage and
12519
+ local environment.
11939
12520
 
11940
12521
  PARAMETERS:
11941
12522
  user_function:
@@ -12146,6 +12727,15 @@ class DataFrame():
12146
12727
  Default Value: True
12147
12728
  Types: bool
12148
12729
 
12730
+ debug:
12731
+ Optional Argument.
12732
+ Specifies whether to display the script file path generated during function execution or not. This
12733
+ argument helps in debugging when there are any failures during function execution. When set
12734
+ to True, function displays the path of the script and does not remove the file from local file system.
12735
+ Otherwise, file is removed from the local file system.
12736
+ Default Value: False
12737
+ Types: bool
12738
+
12149
12739
  RETURNS:
12150
12740
  1. teradataml DataFrame if exec_mode is "IN-DB".
12151
12741
  2. Pandas DataFrame if exec_mode is "LOCAL".
@@ -12311,6 +12901,7 @@ class DataFrame():
12311
12901
  sort_ascending = kwargs.pop('sort_ascending', True)
12312
12902
  auth = kwargs.pop('auth', None)
12313
12903
  charset = kwargs.pop('charset', None)
12904
+ debug = kwargs.pop('debug', False)
12314
12905
 
12315
12906
  # Check for other extra/unknown arguments.
12316
12907
  unknown_args = list(kwargs.keys())
@@ -12329,7 +12920,7 @@ class DataFrame():
12329
12920
  sort_ascending=sort_ascending,
12330
12921
  returns=returns, delimiter=delimiter,
12331
12922
  quotechar=quotechar, auth=auth,
12332
- charset=charset, num_rows=num_rows)
12923
+ charset=charset, num_rows=num_rows, debug=debug)
12333
12924
 
12334
12925
  return tbl_op_util.execute()
12335
12926
 
@@ -12346,9 +12937,9 @@ class DataFrame():
12346
12937
  teradataml DataFrame, leveraging Apply Table Operator of Open
12347
12938
  Analytics Framework.
12348
12939
  Notes:
12349
- 1. The function requires dill package with same version in both remote environment
12350
- and local environment.
12351
- 2. Teradata recommends to use same Python version in both remote and local environment.
12940
+ 1. The function requires to use same Python version in both remote environment and local environment.
12941
+ 2. Teradata recommends to use "dill" package with same version in both remote environment and
12942
+ local environment.
12352
12943
 
12353
12944
  PARAMETERS:
12354
12945
  user_function:
@@ -12531,6 +13122,15 @@ class DataFrame():
12531
13122
  Default value: "csv"
12532
13123
  Types: str
12533
13124
 
13125
+ debug:
13126
+ Optional Argument.
13127
+ Specifies whether to display the script file path generated during function execution or not. This
13128
+ argument helps in debugging when there are any failures during function execution. When set
13129
+ to True, function displays the path of the script and does not remove the file from local file system.
13130
+ Otherwise, file is removed from the local file system.
13131
+ Default Value: False
13132
+ Types: bool
13133
+
12534
13134
  RETURNS:
12535
13135
  teradataml DataFrame.
12536
13136
 
@@ -12707,6 +13307,7 @@ class DataFrame():
12707
13307
  is_local_order = kwargs.pop('is_local_order', False)
12708
13308
  nulls_first = kwargs.pop('nulls_first', True)
12709
13309
  sort_ascending = kwargs.pop('sort_ascending', True)
13310
+ debug = kwargs.pop('debug', False)
12710
13311
 
12711
13312
  # Check for other extra/unknown arguments.
12712
13313
  unknown_args = list(kwargs.keys())
@@ -12729,7 +13330,8 @@ class DataFrame():
12729
13330
  charset=None,
12730
13331
  num_rows=num_rows,
12731
13332
  env_name=env_name,
12732
- style=style)
13333
+ style=style,
13334
+ debug=debug)
12733
13335
 
12734
13336
  return tbl_op_util.execute()
12735
13337
 
@@ -13075,7 +13677,7 @@ class DataFrame():
13075
13677
  False)
13076
13678
  column_names = list(dict.fromkeys(column_names))
13077
13679
 
13078
- if list_td_reserved_keywords(column_names) or UtilFuncs._is_ascii(column_names):
13680
+ if list_td_reserved_keywords(column_names) or UtilFuncs._is_non_ascii(column_names):
13079
13681
  column_names = UtilFuncs._teradata_quote_arg(column_names, "\"", False)
13080
13682
 
13081
13683
  col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, column_names)
@@ -15261,7 +15863,7 @@ class DataFrame():
15261
15863
  return self.assign(**new_columns, drop_columns=True).select(self.columns)
15262
15864
 
15263
15865
  @collect_queryband(queryband="DF_cube")
15264
- def cube(self, columns):
15866
+ def cube(self, columns, include_grouping_columns=False):
15265
15867
  """
15266
15868
  DESCRIPTION:
15267
15869
  cube() function creates a multi-dimensional cube for the DataFrame
@@ -15275,6 +15877,15 @@ class DataFrame():
15275
15877
  Specifies the name(s) of input teradataml DataFrame column(s).
15276
15878
  Types: str OR list of str(s)
15277
15879
 
15880
+ include_grouping_columns:
15881
+ Optional Argument.
15882
+ Specifies whether to include aggregations on the grouping column(s) or not.
15883
+ When set to True, the resultant DataFrame will have the aggregations on the
15884
+ columns mentioned in "columns". Otherwise, resultant DataFrame will not have
15885
+ aggregations on the columns mentioned in "columns".
15886
+ Default Value: False
15887
+ Types: bool
15888
+
15278
15889
  RETURNS:
15279
15890
  teradataml DataFrameGroupBy
15280
15891
 
@@ -15282,9 +15893,27 @@ class DataFrame():
15282
15893
  TeradataMlException
15283
15894
 
15284
15895
  EXAMPLES :
15285
- # Example 1: Analyzes the data by grouping into masters and stats dimensions.
15896
+ # Load the data to run the example.
15286
15897
  >>> load_example_data("dataframe","admissions_train")
15898
+
15899
+ # Create a DataFrame on 'admissions_train' table.
15287
15900
  >>> df = DataFrame("admissions_train")
15901
+ >>> df
15902
+ masters gpa stats programming admitted
15903
+ id
15904
+ 15 yes 4.00 Advanced Advanced 1
15905
+ 34 yes 3.85 Advanced Beginner 0
15906
+ 13 no 4.00 Advanced Novice 1
15907
+ 38 yes 2.65 Advanced Beginner 1
15908
+ 5 no 3.44 Novice Novice 0
15909
+ 40 yes 3.95 Novice Beginner 0
15910
+ 7 yes 2.33 Novice Novice 1
15911
+ 22 yes 3.46 Novice Beginner 0
15912
+ 26 yes 3.57 Advanced Advanced 1
15913
+ 17 no 3.83 Advanced Advanced 1
15914
+
15915
+ # Example 1: Find the sum of all valid columns by grouping the
15916
+ # DataFrame columns with 'masters' and 'stats'.
15288
15917
  >>> df1 = df.cube(["masters", "stats"]).sum()
15289
15918
  >>> df1
15290
15919
  masters stats sum_id sum_gpa sum_admitted
@@ -15299,10 +15928,42 @@ class DataFrame():
15299
15928
  8 no Advanced 189 34.95 9
15300
15929
  9 yes Novice 98 13.74 1
15301
15930
 
15931
+ # Example 2: Find the avg of all valid columns by grouping the DataFrame
15932
+ # with columns 'masters' and 'admitted'. Include grouping columns
15933
+ # in aggregate function 'avg'.
15934
+ >>> df1 = df.cube(["masters", "admitted"], include_grouping_columns=True).avg()
15935
+ >>> df1
15936
+ masters admitted avg_id avg_gpa avg_admitted
15937
+ 0 yes NaN 21.681818 3.532273 0.454545
15938
+ 1 None 1.0 18.846154 3.533462 1.000000
15939
+ 2 no NaN 19.055556 3.553333 0.888889
15940
+ 3 yes 0.0 24.083333 3.613333 0.000000
15941
+ 4 None NaN 20.500000 3.541750 0.650000
15942
+ 5 None 0.0 23.571429 3.557143 0.000000
15943
+ 6 yes 1.0 18.800000 3.435000 1.000000
15944
+ 7 no 1.0 18.875000 3.595000 1.000000
15945
+ 8 no 0.0 20.500000 3.220000 0.000000
15946
+
15947
+ # Example 3: Find the avg of all valid columns by grouping the DataFrame with
15948
+ # columns 'masters' and 'admitted'. Do not include grouping columns
15949
+ # in aggregate function 'avg'.
15950
+ >>> df1 = df.cube(["masters", "admitted"], include_grouping_columns=False).avg()
15951
+ >>> df1
15952
+ masters admitted avg_id avg_gpa
15953
+ 0 no 0.0 20.500000 3.220000
15954
+ 1 None 1.0 18.846154 3.533462
15955
+ 2 no NaN 19.055556 3.553333
15956
+ 3 yes 0.0 24.083333 3.613333
15957
+ 4 None NaN 20.500000 3.541750
15958
+ 5 None 0.0 23.571429 3.557143
15959
+ 6 yes 1.0 18.800000 3.435000
15960
+ 7 yes NaN 21.681818 3.532273
15961
+ 8 no 1.0 18.875000 3.595000
15302
15962
  """
15303
15963
  # Validate columns argument.
15304
15964
  arg_info_matrix = []
15305
15965
  arg_info_matrix.append(["columns", columns, False, (str, list), True])
15966
+ arg_info_matrix.append(["include_grouping_columns", include_grouping_columns, False, bool])
15306
15967
 
15307
15968
  # Validate argument types
15308
15969
  _Validators._validate_function_arguments(arg_info_matrix)
@@ -15312,10 +15973,10 @@ class DataFrame():
15312
15973
 
15313
15974
  # Query generation of cube API is same as the group by.
15314
15975
  # Only 'cube' is concatenated with 'group by' clause.
15315
- return self.groupby(columns, option="cube")
15976
+ return self.groupby(columns, option="cube", include_grouping_columns=include_grouping_columns)
15316
15977
 
15317
15978
  @collect_queryband(queryband="DF_rollup")
15318
- def rollup(self, columns):
15979
+ def rollup(self, columns, include_grouping_columns=False):
15319
15980
  """
15320
15981
  DESCRIPTION:
15321
15982
  rollup() function creates a multi-dimensional rollup for the DataFrame
@@ -15329,6 +15990,15 @@ class DataFrame():
15329
15990
  Specifies the name(s) of input teradataml DataFrame column(s).
15330
15991
  Types: str OR list of str(s)
15331
15992
 
15993
+ include_grouping_columns:
15994
+ Optional Argument.
15995
+ Specifies whether to include aggregations on the grouping column(s) or not.
15996
+ When set to True, the resultant DataFrame will have the aggregations on the
15997
+ columns mentioned in "columns". Otherwise, resultant DataFrame will not have
15998
+ aggregations on the columns mentioned in "columns".
15999
+ Default Value: False
16000
+ Types: bool
16001
+
15332
16002
  RETURNS:
15333
16003
  teradataml DataFrameGroupBy
15334
16004
 
@@ -15336,9 +16006,27 @@ class DataFrame():
15336
16006
  TeradataMlException
15337
16007
 
15338
16008
  EXAMPLES :
15339
- # Example 1: Analyzes the data by grouping into masters and stats dimensions.
16009
+ # Load the data to run the example.
15340
16010
  >>> load_example_data("dataframe","admissions_train")
16011
+
16012
+ # Create a DataFrame on 'admissions_train' table.
15341
16013
  >>> df = DataFrame("admissions_train")
16014
+ >>> df
16015
+ masters gpa stats programming admitted
16016
+ id
16017
+ 15 yes 4.00 Advanced Advanced 1
16018
+ 34 yes 3.85 Advanced Beginner 0
16019
+ 13 no 4.00 Advanced Novice 1
16020
+ 38 yes 2.65 Advanced Beginner 1
16021
+ 5 no 3.44 Novice Novice 0
16022
+ 40 yes 3.95 Novice Beginner 0
16023
+ 7 yes 2.33 Novice Novice 1
16024
+ 22 yes 3.46 Novice Beginner 0
16025
+ 26 yes 3.57 Advanced Advanced 1
16026
+ 17 no 3.83 Advanced Advanced 1
16027
+
16028
+ # Example 1: Find the sum of all valid columns by grouping the
16029
+ # DataFrame columns with 'masters' and 'stats'.
15342
16030
  >>> df1 = df.rollup(["masters", "stats"]).sum()
15343
16031
  >>> df1
15344
16032
  masters stats sum_id sum_gpa sum_admitted
@@ -15351,11 +16039,39 @@ class DataFrame():
15351
16039
  6 yes Beginner 13 14.71 2
15352
16040
  7 yes Advanced 366 49.26 7
15353
16041
  8 no Advanced 189 34.95 9
15354
-
16042
+
16043
+ # Example 2: Find the avg of all valid columns by grouping the DataFrame
16044
+ # with columns 'masters' and 'admitted'. Include grouping columns
16045
+ # in aggregate function 'avg'.
16046
+ >>> df1 = df.rollup(["masters", "admitted"], include_grouping_columns=True).avg()
16047
+ >>> df1
16048
+ masters admitted avg_id avg_gpa avg_admitted
16049
+ 0 no NaN 19.055556 3.553333 0.888889
16050
+ 1 yes NaN 21.681818 3.532273 0.454545
16051
+ 2 None NaN 20.500000 3.541750 0.650000
16052
+ 3 yes 0.0 24.083333 3.613333 0.000000
16053
+ 4 no 1.0 18.875000 3.595000 1.000000
16054
+ 5 yes 1.0 18.800000 3.435000 1.000000
16055
+ 6 no 0.0 20.500000 3.220000 0.000000
16056
+
16057
+ # Example 3: Find the avg of all valid columns by grouping the DataFrame with
16058
+ # columns 'masters' and 'admitted'. Do not include grouping columns
16059
+ # in aggregate function 'avg'.
16060
+ >>> df1 = df.rollup(["masters", "admitted"], include_grouping_columns=False).avg()
16061
+ >>> df1
16062
+ masters admitted avg_id avg_gpa
16063
+ 0 no NaN 19.055556 3.553333
16064
+ 1 yes NaN 21.681818 3.532273
16065
+ 2 no 0.0 20.500000 3.220000
16066
+ 3 yes 0.0 24.083333 3.613333
16067
+ 4 no 1.0 18.875000 3.595000
16068
+ 5 yes 1.0 18.800000 3.435000
16069
+ 6 None NaN 20.500000 3.541750
15355
16070
  """
15356
16071
  # Validate columns argument.
15357
16072
  arg_info_matrix = []
15358
16073
  arg_info_matrix.append(["columns", columns, False, (str, list), True])
16074
+ arg_info_matrix.append(["include_grouping_columns", include_grouping_columns, False, bool])
15359
16075
 
15360
16076
  # Validate argument types
15361
16077
  _Validators._validate_function_arguments(arg_info_matrix)
@@ -15365,8 +16081,255 @@ class DataFrame():
15365
16081
 
15366
16082
  # Query generation of cube API is same as the group by.
15367
16083
  # Only 'rollup' is concatenated with 'group by' clause.
15368
- return self.groupby(columns, option="rollup")
16084
+ return self.groupby(columns, option="rollup", include_grouping_columns=include_grouping_columns)
16085
+
16086
+ # Metadata functions for DataFrame created on datalake/OTF table.
16087
+ @property
16088
+ @collect_queryband(queryband="DF_snpsht")
16089
+ @df_utils.check_otf_dataframe()
16090
+ def snapshots(self):
16091
+ """
16092
+ DESCRIPTION:
16093
+ Gets snapshot information for a DataLake table.
16094
+
16095
+ PARAMETERS:
16096
+ None
16097
+
16098
+ RETURNS:
16099
+ teradataml DataFrame.
15369
16100
 
16101
+ RAISES:
16102
+ TeradataMLException.
16103
+
16104
+ EXAMPLES :
16105
+ # Example 1: Get the snapshot information for datalake table.
16106
+ >>> from teradataml.dataframe.dataframe import in_schema
16107
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
16108
+ ... table_name="datalake_table",
16109
+ ... datalake_name="datalake")
16110
+ >>> datalake_df = DataFrame(in_schema_tbl)
16111
+ >>> datalake_df.snapshots
16112
+ snapshotId snapshotTimestamp timestampMSecs manifestList summary
16113
+ 0 6373759902296319074 2023-06-15 00:07:47 1686787667420 s3://vim-iceberg-v1/glue/metadata/snap-6373759... {"added-data-files":"1","added-records":"5","a...}
16114
+ 1 4768076782814510171 2023-06-15 00:09:01 1686787741964 s3://vim-iceberg-v1/glue/metadata/snap-4768076... {"added-data-files":"1","added-records":"2","a...}
16115
+ 2 7771482207931850214 2024-05-29 04:59:09 1716958749946 s3://vim-iceberg-v1/glue/metadata/snap-7771482... {"deleted-data-files":"2","deleted-records":"7...}
16116
+ 3 1545363077953282623 2024-05-29 05:13:39 1716959619455 s3://vim-iceberg-v1/glue/metadata/snap-1545363... {"changed-partition-count":"0","total-records"...}
16117
+ 4 2166707884289108360 2024-05-29 05:17:49 1716959869075 s3://vim-iceberg-v1/glue/metadata/snap-2166707... {"changed-partition-count":"0","total-records"...}
16118
+ 5 8934190131471882700 2024-05-29 05:21:32 1716960092422 s3://vim-iceberg-v1/glue/metadata/snap-8934190... {"changed-partition-count":"0","total-records"...}
16119
+ 6 3086605171258231948 2024-05-29 05:34:43 1716960883786 s3://vim-iceberg-v1/glue/metadata/snap-3086605... {"changed-partition-count":"0","total-records"...}
16120
+ 7 7592503716012384122 2024-05-29 06:04:48 1716962688047 s3://vim-iceberg-v1/glue/metadata/snap-7592503... {"changed-partition-count":"0","total-records"...}
16121
+ 8 2831061717890032890 2024-06-04 17:21:01 1717521661689 s3://vim-iceberg-v1/glue/metadata/snap-2831061... {"added-data-files":"2","added-records":"7","a...}
16122
+ 9 8810491341502972715 2024-10-22 23:47:22 1729640842067 s3://vim-iceberg-v1/glue/metadata/snap-8810491... {"added-data-files":"1","added-records":"1","a...}
16123
+ 10 3953136136558551163 2024-12-03 04:40:48 1733200848733 s3://vim-iceberg-v1/glue/metadata/snap-3953136... {"added-data-files":"1","added-records":"4","a...}
16124
+ 11 6034775168901969481 2024-12-03 04:40:49 1733200849966 s3://vim-iceberg-v1/glue/metadata/snap-6034775... {"deleted-data-files":"1","deleted-records":"5...}
16125
+ """
16126
+ return self._execute_metadata_query_and_generate_dataframe("TD_SNAPSHOTS")
16127
+
16128
+ @property
16129
+ @collect_queryband(queryband="DF_prttns")
16130
+ @df_utils.check_otf_dataframe()
16131
+ def partitions(self):
16132
+ """
16133
+ DESCRIPTION:
16134
+ Gets partition information for a DataLake table.
16135
+
16136
+ PARAMETERS:
16137
+ None
16138
+
16139
+ RETURNS:
16140
+ teradataml DataFrame.
16141
+
16142
+ RAISES:
16143
+ TeradataMLException.
16144
+
16145
+ EXAMPLES :
16146
+ # Example 1: Get the partition information for datalake table.
16147
+ >>> from teradataml.dataframe.dataframe import in_schema
16148
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
16149
+ ... table_name="datalake_table",
16150
+ ... datalake_name="datalake")
16151
+ >>> datalake_df = DataFrame(in_schema_tbl)
16152
+ >>> datalake_df.partitions
16153
+ id name
16154
+ 0 1000 c2
16155
+ 1 1001 c3
16156
+
16157
+
16158
+ """
16159
+ return self._execute_metadata_query_and_generate_dataframe("TD_PARTITIONS")
16160
+
16161
+ @property
16162
+ @collect_queryband(queryband="DF_mnfsts")
16163
+ @df_utils.check_otf_dataframe()
16164
+ def manifests(self):
16165
+ """
16166
+ DESCRIPTION:
16167
+ Gets manifest information for a DataLake table.
16168
+
16169
+ PARAMETERS:
16170
+ None
16171
+
16172
+ RETURNS:
16173
+ teradataml DataFrame.
16174
+
16175
+ RAISES:
16176
+ TeradataMLException.
16177
+
16178
+ EXAMPLES :
16179
+ # Example 1: Get the manifest information for datalake table.
16180
+ >>> from teradataml.dataframe.dataframe import in_schema
16181
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
16182
+ ... table_name="datalake_table",
16183
+ ... datalake_name="datalake")
16184
+ >>> datalake_df = DataFrame(in_schema_tbl)
16185
+ >>> datalake_df.manifests
16186
+ snapshotId snapshotTimestamp manifestList manifestFile manifestFileLength datafilecount totalrowcount
16187
+ 0 8068130797628952520 2025-05-02 11:45:26 s3://vim-iceberg-v1/otftestdb/nt_sales/... s3://vim-iceberg-v1/otftestdb/nt_sales/... 7158 6 6
16188
+ """
16189
+ return self._execute_metadata_query_and_generate_dataframe("TD_MANIFESTS")
16190
+
16191
+ @property
16192
+ @collect_queryband(queryband="DF_hstry")
16193
+ @df_utils.check_otf_dataframe()
16194
+ def history(self):
16195
+ """
16196
+ DESCRIPTION:
16197
+ Gets the snapshot history related to a DataLake table.
16198
+
16199
+ PARAMETERS:
16200
+ None
16201
+
16202
+ RETURNS:
16203
+ teradataml DataFrame.
16204
+
16205
+ RAISES:
16206
+ TeradataMLException.
16207
+
16208
+ EXAMPLES :
16209
+ # Example 1: Get the partition information for datalake table.
16210
+ >>> from teradataml.dataframe.dataframe import in_schema
16211
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
16212
+ ... table_name="datalake_table",
16213
+ ... datalake_name="datalake")
16214
+ >>> datalake_df = DataFrame(in_schema_tbl)
16215
+ >>> datalake_df.history
16216
+ id timestamp
16217
+ 0 8068130797628952520 2025-05-02 11:45:26
16218
+ """
16219
+ return self._execute_metadata_query_and_generate_dataframe("TD_HISTORY")
16220
+
16221
+ def _execute_metadata_query_and_generate_dataframe(self, func_name):
16222
+ """Function executes OTF metadata query and return result in DataFrame format"""
16223
+ query = SQLBundle()._get_sql_query(SQLConstants.SQL_TD_OTF_METADATA).format(func_name,
16224
+ self._table_name)
16225
+ return DataFrame.from_query(query)
16226
+
16227
+ @collect_queryband(queryband="DF_gt_snpsht")
16228
+ @df_utils.check_otf_dataframe()
16229
+ def get_snapshot(self, as_of):
16230
+ """
16231
+ DESCRIPTION:
16232
+ Gets the data from a DataLake table for the given snapshot id or timestamp string.
16233
+ Notes:
16234
+ * The snapshot id can be obtained from the 'snapshots' property of the DataFrame.
16235
+ * The time travel value represented by 'as_of' should be in the format "YYYY-MM-DD HH:MM:SS.FFFFFFF"
16236
+ for TIMESTAMP string or "YYYY-MM-DD" for DATE string.
16237
+
16238
+ PARAMETERS:
16239
+ as_of:
16240
+ Required Argument.
16241
+ Specifies the snapshot id or timestamp information for which the snapshot is to be fetched.
16242
+ Types: str or int
16243
+
16244
+ RETURNS:
16245
+ teradataml DataFrame.
16246
+
16247
+ RAISES:
16248
+ TeradataMLException.
16249
+
16250
+ EXAMPLES:
16251
+ # DataFrame creation on OTF table.
16252
+ >>> from teradataml.dataframe.dataframe import in_schema
16253
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
16254
+ ... table_name="datalake_table",
16255
+ ... datalake_name="datalake")
16256
+ >>> datalake_df = DataFrame(in_schema_tbl)
16257
+
16258
+ # List snapshots first.
16259
+ >>> datalake_df.snapshots
16260
+ snapshotId snapshotTimestamp timestampMSecs manifestList summary
16261
+ 2046682612111137809 2025-06-03 13:26:15 1748957175692 s3://vim-iceberg-v1/datalake_db/datalake_table/metadata/snap-204... {"added-data-files":"Red Inc","added-records"...}
16262
+ 282293708812257203 2025-06-03 05:53:19 1748929999245 s3://vim-iceberg-v1/datalake_db/datalake_table/metadata/snap-282... {"added-data-files":"Blue Inc","added-records"...}
16263
+
16264
+ # Example 1: Get the snapshot using snapshot id.
16265
+ >>> datalake_df.get_snapshot(2046682612111137809)
16266
+ Feb Jan Mar Apr datetime
16267
+ accounts
16268
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
16269
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
16270
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
16271
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
16272
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
16273
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
16274
+
16275
+ # Example 2: Get the snapshot using snapshot id in string format.
16276
+ >>> datalake_df.get_snapshot("2046682612111137809")
16277
+ Feb Jan Mar Apr datetime
16278
+ accounts
16279
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
16280
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
16281
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
16282
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
16283
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
16284
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
16285
+
16286
+ # Example 3: Get the snapshot using timestamp string.
16287
+ >>> datalake_df.get_snapshot("2025-06-03 13:26:16")
16288
+ Feb Jan Mar Apr datetime
16289
+ accounts
16290
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
16291
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
16292
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
16293
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
16294
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
16295
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
16296
+
16297
+ # Example 4: Get the snapshot using date string.
16298
+ >>> datalake_df.get_snapshot("2025-06-04")
16299
+ Feb Jan Mar Apr datetime
16300
+ accounts
16301
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
16302
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
16303
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
16304
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
16305
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
16306
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
16307
+
16308
+ """
16309
+ _Validators._validate_function_arguments([["as_of", as_of, False, (int, str)]])
16310
+
16311
+ # If already int or string representation of int, return by quoting it
16312
+ if isinstance(as_of, int) or (isinstance(as_of, str) and as_of.isdigit()):
16313
+ snapshot_on = "'{}'".format(as_of)
16314
+ else:
16315
+ try:
16316
+ snapshot_on = UtilFuncs._get_time_formatted_string(as_of)
16317
+ except ValueError as e:
16318
+ raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
16319
+ "get_snapshot", "Invalid value for 'as_of' argument: {}. "
16320
+ "Use valid format [\"YYYY-MM-DD HH:MM:SS.FFFFFFF\", \"YYYY-MM-DD HH:MM:SS\","
16321
+ "\"YYYY-MM-DD\"]".format(as_of)),
16322
+ MessageCodes.FUNC_EXECUTION_FAILED)
16323
+
16324
+ query = SQLBundle()._get_sql_query(SQLConstants.SQL_TD_OTF_SNAPSHOT).format(self._table_name, snapshot_on)
16325
+
16326
+ try:
16327
+ return DataFrame.from_query(query)
16328
+ except TeradataMlException as e:
16329
+ raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
16330
+ "get_snapshot()", "Invalid value for 'as_of' argument: {}. "
16331
+ "Use valid timestamp or correct snapshot id listed using 'snapshots' property.".format(as_of)),
16332
+ MessageCodes.FUNC_EXECUTION_FAILED)
15370
16333
 
15371
16334
  class DataFrameGroupBy(DataFrame):
15372
16335
  """
@@ -15375,7 +16338,7 @@ class DataFrameGroupBy(DataFrame):
15375
16338
 
15376
16339
  """
15377
16340
 
15378
- def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list, option=None):
16341
+ def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list, option=None, include_grouping_columns=False):
15379
16342
  """
15380
16343
  init() method for DataFrameGroupBy.
15381
16344
 
@@ -15416,6 +16379,15 @@ class DataFrameGroupBy(DataFrame):
15416
16379
  Permitted Values: "CUBE", "ROLLUP", None
15417
16380
  Types: str or NoneType
15418
16381
 
16382
+ include_grouping_columns:
16383
+ Optional Argument.
16384
+ Specifies whether to include aggregations on the grouping column(s) or not.
16385
+ When set to True, the resultant DataFrame will have the aggregations on the
16386
+ columns mentioned in "columns". Otherwise, resultant DataFrame will not have
16387
+ aggregations on the columns mentioned in "columns".
16388
+ Default Value: False
16389
+ Types: bool
16390
+
15419
16391
  RETURNS:
15420
16392
  teradataml DataFrameGroupBy instance
15421
16393
  """
@@ -15425,6 +16397,7 @@ class DataFrameGroupBy(DataFrame):
15425
16397
  self._column_names_and_types = column_names_and_types
15426
16398
  self._columns = columns
15427
16399
  self.groupby_column_list = column_list
16400
+ self._include_grouping_columns = include_grouping_columns
15428
16401
 
15429
16402
  def _get_assign_allowed_types(self):
15430
16403
  """
@@ -15446,7 +16419,7 @@ class DataFrameGroupBy(DataFrame):
15446
16419
  from sqlalchemy.sql.functions import Function
15447
16420
  return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression, ClauseElement)
15448
16421
 
15449
- def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
16422
+ def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, node_id, **kwargs):
15450
16423
  """
15451
16424
  DESCRIPTION:
15452
16425
  Function generates the MetaExpression and AED nodeid for DataFrameGroupBy.assign()
@@ -15459,6 +16432,11 @@ class DataFrameGroupBy(DataFrame):
15459
16432
  and grouping columns are returned. This is unused argument.
15460
16433
  Types: bool
15461
16434
 
16435
+ node_id:
16436
+ Optional Argument.
16437
+ Specifies the input nodeid for the assign operation. This is unused argument.
16438
+ Types: str
16439
+
15462
16440
  kwargs:
15463
16441
  keyword, value pairs
15464
16442
  - keywords are the column names.