PyPI - teradataml - Versions diffs - 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl - Mend

teradataml 20.0.0.4py3-none-any.whl → 20.0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (131) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +182 -13
teradataml/__init__.py +2 -1
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +8 -13
teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
teradataml/analytics/sqle/__init__.py +16 -1
teradataml/analytics/utils.py +60 -1
teradataml/automl/__init__.py +290 -106
teradataml/automl/autodataprep/__init__.py +471 -0
teradataml/automl/data_preparation.py +29 -10
teradataml/automl/data_transformation.py +11 -0
teradataml/automl/feature_engineering.py +64 -4
teradataml/automl/feature_exploration.py +639 -25
teradataml/automl/model_training.py +1 -1
teradataml/clients/auth_client.py +12 -8
teradataml/clients/keycloak_client.py +165 -0
teradataml/common/constants.py +71 -26
teradataml/common/exceptions.py +32 -0
teradataml/common/messagecodes.py +28 -0
teradataml/common/messages.py +13 -4
teradataml/common/sqlbundle.py +3 -2
teradataml/common/utils.py +345 -45
teradataml/context/context.py +259 -93
teradataml/data/apriori_example.json +22 -0
teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +1 -0
teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
teradataml/data/ner_dict.csv +8 -0
teradataml/data/ner_input_eng.csv +7 -0
teradataml/data/ner_rule.csv +5 -0
teradataml/data/pattern_matching_data.csv +11 -0
teradataml/data/pos_input.csv +40 -0
teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
teradataml/data/tdnerextractor_example.json +14 -0
teradataml/data/teradataml_example.json +21 -1
teradataml/data/textmorph_example.json +5 -0
teradataml/data/to_num_data.csv +4 -0
teradataml/data/tochar_data.csv +5 -0
teradataml/data/trans_dense.csv +16 -0
teradataml/data/trans_sparse.csv +55 -0
teradataml/data/url_data.csv +10 -9
teradataml/dataframe/copy_to.py +38 -27
teradataml/dataframe/data_transfer.py +61 -45
teradataml/dataframe/dataframe.py +1110 -132
teradataml/dataframe/dataframe_utils.py +73 -27
teradataml/dataframe/functions.py +1070 -9
teradataml/dataframe/sql.py +750 -959
teradataml/dbutils/dbutils.py +33 -13
teradataml/dbutils/filemgr.py +14 -10
teradataml/hyperparameter_tuner/utils.py +4 -2
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/_base.py +12 -157
teradataml/options/configure.py +24 -9
teradataml/scriptmgmt/UserEnv.py +317 -39
teradataml/scriptmgmt/lls_utils.py +456 -135
teradataml/sdk/README.md +79 -0
teradataml/sdk/__init__.py +4 -0
teradataml/sdk/_auth_modes.py +422 -0
teradataml/sdk/_func_params.py +487 -0
teradataml/sdk/_json_parser.py +453 -0
teradataml/sdk/_openapi_spec_constants.py +249 -0
teradataml/sdk/_utils.py +236 -0
teradataml/sdk/api_client.py +897 -0
teradataml/sdk/constants.py +62 -0
teradataml/sdk/modelops/__init__.py +98 -0
teradataml/sdk/modelops/_client.py +406 -0
teradataml/sdk/modelops/_constants.py +304 -0
teradataml/sdk/modelops/models.py +2308 -0
teradataml/sdk/spinner.py +107 -0
teradataml/store/__init__.py +1 -1
teradataml/table_operators/Apply.py +16 -1
teradataml/table_operators/Script.py +20 -1
teradataml/table_operators/query_generator.py +4 -21
teradataml/table_operators/table_operator_util.py +58 -9
teradataml/utils/dtypes.py +4 -2
teradataml/utils/internal_buffer.py +22 -2
teradataml/utils/utils.py +0 -1
teradataml/utils/validators.py +318 -58
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0

teradataml/dataframe/dataframe.py CHANGED Viewed

@@ -12,63 +12,72 @@ This file implements the teradataml dataframe.
 A teradataml dataframe maps virtually to teradata tables and views.
 """
 import decimal
-import inspect, itertools
+import inspect
+import itertools
 import json
 import numbers
-import pandas as pd
 import re
-import sqlalchemy
 import sys
 import urllib.parse
+from collections import OrderedDict
+from collections.abc import Iterator
+import numpy as np
+import pandas as pd
+import sqlalchemy
 from sqlalchemy import Column
+from sqlalchemy.exc import NoSuchColumnError
+from sqlalchemy.sql import ClauseElement
+from teradatasql import OperationalError
+from teradatasqlalchemy.dialect import dialect as td_dialect
+from teradatasqlalchemy.dialect import preparer
+from teradatasqlalchemy.types import (BIGINT, BYTEINT, DECIMAL, FLOAT, INTEGER,
+                                      PERIOD_TIMESTAMP, SMALLINT, _TDType)
 import teradataml.context.context as tdmlctx
-from collections import OrderedDict, namedtuple
-from sqlalchemy.sql import ClauseElement
-from teradataml import execute_sql
-from teradataml import GarbageCollector
-from teradataml.dataframe.sql import _MetaExpression
-from teradataml.dataframe.sql_interfaces import ColumnExpression
-from teradataml.dataframe.sql_functions import case
-from teradataml.series.series import Series
-from teradatasqlalchemy.types import _TDType, BIGINT, INTEGER, PERIOD_TIMESTAMP, SMALLINT, BYTEINT, FLOAT, DECIMAL
-from teradataml.common.deprecations import argument_deprecation
-from teradataml.common.utils import UtilFuncs
+from teradataml import GarbageCollector, execute_sql
+from teradataml.common.bulk_exposed_utils import \
+    _validate_unimplemented_function
+from teradataml.common.constants import (AEDConstants, OutputStyle,
+                                         PTITableConstants, PythonTypes,
+                                         SourceType, SQLConstants,
+                                         SQLFunctionConstants,
+                                         TableOperatorConstants,
+                                         TeradataConstants, TeradataTypes)
 from teradataml.common.exceptions import TeradataMlException
-from teradataml.common.messages import Messages
 from teradataml.common.messagecodes import MessageCodes
-from teradataml.common.constants import AEDConstants
-from teradataml.common.constants import SourceType, PythonTypes, TeradataConstants, \
-    TeradataTypes, PTITableConstants, TableOperatorConstants, SQLFunctionConstants
-from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils, DataFrameUtils
-from teradataml.dataframe.indexer import _LocationIndexer
-from teradataml.common.aed_utils import AedUtils
-from teradataml.options.display import display
-from teradataml.options.configure import configure
+from teradataml.common.messages import Messages
+from teradataml.common.sqlbundle import SQLBundle
+from teradataml.common.utils import UtilFuncs
 from teradataml.dataframe.copy_to import copy_to_sql
+from teradataml.dataframe.data_transfer import _DataTransferUtils
+from teradataml.dataframe.dataframe_utils import DataFrameUtils
+from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
+from teradataml.dataframe.indexer import _LocationIndexer
 from teradataml.dataframe.row import _Row
 from teradataml.dataframe.setop import concat
+from teradataml.dataframe.sql import _MetaExpression
+from teradataml.dataframe.sql_functions import case
+from teradataml.dataframe.sql_interfaces import ColumnExpression
+from teradataml.dataframe.window import Window
 from teradataml.dbutils.dbutils import list_td_reserved_keywords
+from teradataml.options.configure import configure
+from teradataml.options.display import display
 from teradataml.plot.plot import _Plot
 from teradataml.scriptmgmt.UserEnv import UserEnv
-from teradataml.utils.dtypes import _Dtypes, _ListOf, _TupleOf
-from teradataml.utils.validators import _Validators
+from teradataml.series.series import Series
 from teradataml.table_operators.table_operator_util import _TableOperatorUtils
-from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
-from teradatasql import OperationalError
-from teradataml.dataframe.window import Window
-from teradataml.dataframe.data_transfer import _DataTransferUtils
-from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
 from teradataml.telemetry_utils.queryband import collect_queryband
-from teradataml.options.configure import configure
-from teradataml.utils.internal_buffer import _InternalBuffer
-from teradataml.common.constants import OutputStyle
+from teradataml.utils.dtypes import _Dtypes, _ListOf, _TupleOf
+from teradataml.utils.validators import _Validators
+# Adding imports at the end to avoid circular imports.
+from teradataml.common.aed_utils import AedUtils
 # TODO use logger when available on master branch
 # logger = teradatapylog.getLogger()
 class in_schema:
     """
     Class takes a schema name, a table name and datalake name attributes
@@ -149,26 +158,37 @@ class DataFrame():
     on tables, views, and queries on Teradata Vantage.
     """
-    def __init__(self, table_name=None, index=True, index_label=None, query=None, materialize=False):
+    def __init__(self, data=None, index=True, index_label=None, query=None, materialize=False, **kwargs):
         """
         Constructor for teradataml DataFrame.
         PARAMETERS:
-            table_name:
+            data:
                 Optional Argument.
-                The table name or view name in Teradata Vantage referenced by this DataFrame.
-                Types: str
+                Specifies the input data to create a teradataml DataFrame.
+                Notes:
+                    If a dictionary is provided, it must follow the below requirements:
+                        * Keys must be strings (column names).
+                        * Values must be lists of equal length (column data).
+                        * Nested dictionaries are not supported.
+                Types: str OR pandas DataFrame OR in_schema OR numpy array OR list OR dictionary
             index:
                 Optional Argument.
-                True if using index column for sorting, otherwise False.
+                If "data" is a string, then the argument specifies whether to use the index column
+                for sorting or not.
+                If "data" is a pandas DataFrame, then this argument specifies whether to
+                save Pandas DataFrame index as a column or not.
                 Default Value: True
                 Types: bool
             index_label:
                 Optional Argument.
-                Column/s used for sorting.
-                Types: str OR list of Strings (str)
+                If "data" is a string, then the argument specifies column(s) used for sorting.
+                If "data" is a pandas DataFrame, then the default behavior is applied.
+                Note:
+                    * Refer to the "index_label" parameter of copy_to_sql() for details on the default behaviour.
+                Types: str OR list of str
             query:
                 Optional Argument.
@@ -187,29 +207,127 @@ class DataFrame():
                 Default Value: False (No materialization)
                 Types: bool
+            kwargs:
+                table_name:
+                    Optional Argument.
+                    The table name or view name in Teradata Vantage referenced by this DataFrame.
+                    Note:
+                        * If "data" and "table_name" are both specified, then the "table_name" argument is ignored.
+                    Types: str or in_schema
+                primary_index:
+                    Optional Argument.
+                    Specifies which column(s) to use as primary index for the teradataml DataFrame.
+                    Note:
+                        * This argument is only applicable when creating a DataFrame from a pandas DataFrame.
+                    Types: str OR list of str
+                types:
+                    Optional Argument.
+                    Specifies required data types for requested columns to be saved in Teradata Vantage.
+                    Notes:
+                        * This argument is not applicable when "data" argument is of type str or in_schema.
+                        * Refer to the "types" parameter of copy_to_sql() for more details.
+                    Types: dict
+                columns:
+                    Optional Argument.
+                    Specifies the names of the columns to be used in the DataFrame.
+                    Notes:
+                        * This argument is not applicable when "data" argument is of type str or in_schema.
+                        * If "data" is a dictionary and this argument is specified, only the specified columns will be
+                          included in the DataFrame if the dictionary contains those keys. If the dictionary does not
+                          contain the specified keys, those columns will be added with NaN values.
+                    Types: str OR list of str
         EXAMPLES:
-            from teradataml.dataframe.dataframe import DataFrame
+            >>> from teradataml.dataframe.dataframe import DataFrame
+            >>> import pandas as pd
-            # Example 1: The following example creates a DataFrame from the 'table_name'
-            #            or 'view_name'.
-            # Created DataFrame using table name.
-            df = DataFrame("mytab")
+            # Example 1: Create a teradataml DataFrame from table name.
+            >>> df = DataFrame("mytab")
-            # Created DataFrame using view name.
-            df = DataFrame("myview")
+            # Example 2: Create a teradataml DataFrame from view name.
+            >>> df = DataFrame("myview")
-            # Created DataFrame using view name without using index column for sorting.
-            df = DataFrame("myview", False)
+            # Example 3: Create a teradataml DataFrame using view name without using index column for sorting.
+            >>> df = DataFrame("myview", False)
-            # Created DataFrame using table name and sorted using Col1 and Col2
-            df = DataFrame("mytab", True, "Col1, Col2")
+            # Example 4: Create a teradataml DataFrame using table name and consider columns Col1 and Col2
+            #            while running DataFrame.head() or DataFrame.tail() methods.
+            >>> df = DataFrame("mytab", True, ["Col1", "Col2"])
+            # Example 5: Create a teradataml DataFrame from the existing Vantage table "dbcinfo"
+            #            in the non-default database "dbc" using the in_schema() object.
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> df = DataFrame(in_schema("dbc", "dbcinfo"))
-            # Example 2: The following example creates a DataFrame from the existing Vantage
-            #            table "dbcinfo" in the non-default database "dbc" using the
-            #            in_schema() function.
-            from teradataml.dataframe.dataframe import in_schema
-            df = DataFrame(in_schema("dbc", "dbcinfo"))
+            # Example 6: Create a teradataml DataFrame from a pandas DataFrame.
+            >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
+            >>> df = DataFrame(pdf)
+            >>> df
+               col1  col2  index_label
+            0     3     6            2
+            1     2     5            1
+            2     1     4            0
+            # Example 7: Create a teradataml DataFrame from a pandas DataFrame without index column.
+            >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
+            >>> df = DataFrame(data=pdf, index=False)
+            >>> df
+               col1  col2
+            0     3     6
+            1     2     5
+            2     1     4
+            # Example 8: Create a teradataml DataFrame from a pandas DataFrame with
+            #            index label and primary index as 'id'.
+            >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
+            >>> df = DataFrame(pdf, index=True, index_label='id', primary_index='id')
+            >>> df
+                col1  col2
+            id
+            2      3     6
+            1      2     5
+            0      1     4
+            # Example 9: Create a teradataml DataFrame from list of lists.
+            >>> df = DataFrame([[1, 2], [3, 4]])
+            >>> df
+               col_0  col_1  index_label
+            0      3      4            1
+            1      1      2            0
+            # Example 10: Create a teradataml DataFrame from numpy array.
+            >>> import numpy as np
+            >>> df = DataFrame(np.array([[1, 2], [3, 4]]), index=True, index_label="id")
+            >>> df
+                col_0  col_1
+            id
+            1       3      4
+            0       1      2
+            # Example 11: Create a teradataml DataFrame from a dictionary.
+            >>> df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=True, index_label="id")
+            >>> df
+                col1  col2
+            id
+            1      2     4
+            0      1     3
+            # Example 12: Create a teradataml DataFrame from list of dictionaries.
+            >>> df = DataFrame([{"col1": 1, "col2": 2}, {"col1": 3, "col2": 4}], index=False)
+            >>> df
+                col1  col2
+            0      3     4
+            1      1     2
+            # Example 13: Create a teradataml DataFrame from list of tuples.
+            >>> df = DataFrame([("Alice", 1), ("Bob", 2)])
+            >>> df
+                  col_0  col_1  index_label
+            0     Alice      1            1
+            1       Bob      2            0
         RAISES:
             TeradataMlException - TDMLDF_CREATE_FAIL
@@ -243,17 +361,35 @@ class DataFrame():
         # Property to determine if table is an ART table or not.
         self._is_art = None
+        # This attribute stores the previous assign arguments in continuous assign calls.
+        self._previous_assign_args = None
+        # This attribute stores the root DataFrame columns.
+        self._root_columns = None
         self._datalake = None
         self._database = None
         self._table = None
         self._otf = False
-        if isinstance(table_name, in_schema):
-            self._table = table_name.table_name
-            self._datalake = table_name.datalake_name
-            self._database = table_name.schema_name
+        table_name = kwargs.get("table_name", None)
+        primary_index = kwargs.get("primary_index", None)
+        columns = kwargs.get("columns", None)
+        types = kwargs.get("types", None)
+        # Check if the data is an instance of in_schema or if the data is None
+        # and table_name is an instance of in_schema, then assign the table_name,
+        # datalake_name and schema_name to the DataFrame object.
+        schema_obj = data if isinstance(data, in_schema) else (
+                     table_name if data is None and isinstance(table_name, in_schema) else None)
+        if schema_obj:
+            self._table = schema_obj.table_name
+            self._datalake = schema_obj.datalake_name
+            self._database = schema_obj.schema_name
             self._otf = True if self._datalake else False
+        # Convert schema objects to strings.
+        data = str(data) if isinstance(data, in_schema) else data
         table_name = str(table_name) if isinstance(table_name, in_schema) else table_name
         # Below matrix is list of list, where in each row contains following elements:
@@ -272,18 +408,49 @@ class DataFrame():
         #   6. element6 --> A list of permitted values, an argument can accept.
         #                   If not specified, it is as good as passing None. If a list is passed, validation will be
         #                   performed for permitted values.
         awu_matrix = []
-        awu_matrix.append(["table_name", table_name, True, (str), True])
+        dtypes = (list, tuple, dict)
+        awu_matrix.append(["data", data, True, (str, pd.DataFrame, np.ndarray, dict, _ListOf(dtypes)), True])
         awu_matrix.append(["index", index, True, (bool)])
         awu_matrix.append(["index_label", index_label, True, (str, list)])
         awu_matrix.append(["query", query, True, (str), True])
         awu_matrix.append(["materialize", materialize, True, (bool)])
+        awu_matrix.append(["table_name", table_name, True, (str), True])
+        awu_matrix.append(["primary_index", primary_index, True, (str, list)])
+        awu_matrix.append(["types", types, True, (dict)])
+        awu_matrix.append(["columns", columns, True, (str, list), True])
         # Validate argument types
         _Validators._validate_function_arguments(awu_matrix)
+        # Convert columns to list if it is a string.
+        if isinstance(columns, str):
+            columns = [columns]
         try:
-            if table_name is not None:
+            if table_name is not None or data is not None:
+                #   If data is list or numpy array or dictionary, then convert it to a pandas DataFrame.
+                if isinstance(data, (list, np.ndarray, dict)):
+                    data = pd.DataFrame(data, columns=columns)
+                # If the data is a pandas DataFrame, then store the data in a temporary table in Vantage.
+                if isinstance(data, pd.DataFrame):
+                    # Create a copy of the pandas DataFrame to avoid modifying the original,
+                    # because column names will be changed if they are integers.
+                    pd_data = data.copy()
+                    # If the columns are not of type string, then convert them to string.
+                    pd_data.columns = [f"col_{i}" if isinstance(i, int) else i for i in pd_data.columns]
+                    # Set the table_name to the name of the table created in the database.
+                    table_name = UtilFuncs._generate_temp_table_name(prefix="from_pandas",
+                                                                table_type=TeradataConstants.TERADATA_TABLE)
+                    copy_to_sql(pd_data, table_name, index=index, index_label=index_label, primary_index=primary_index,
+                                types=types)
+                # If the data is a string, then set the table_name to the data.
+                elif isinstance(data, str):
+                    table_name = data
                 self._table_name = UtilFuncs._quote_table_names(table_name)
                 self._source_type = SourceType.TABLE.value
                 self._nodeid = self._aed_utils._aed_table(self._table_name)
@@ -337,6 +504,12 @@ class DataFrame():
                     elif "[Error 3706] Syntax error" in str(oe):
                         raise ValueError(Messages.get_message(
                             MessageCodes.FROM_QUERY_SELECT_SUPPORTED).format("Check the syntax."))
+                    elif "[Error 7825]" in str(oe):
+                        # The UDF/XSP/UDM routine has thrown an SQLException
+                        # with an SQL state in the range of 38001-38999 which
+                        # is not a syntax error. Hence not a ValueError wrt query string.
+                        # Expected when OTF snapshot related query is executed.
+                        raise
                     raise ValueError(Messages.get_message(
                         MessageCodes.FROM_QUERY_SELECT_SUPPORTED))
@@ -498,7 +671,7 @@ class DataFrame():
                 Types: str
         EXAMPLES:
-            >>> from teradataml.dataframe.dataframe import DataFrame
+            >>> from teradataml import DataFrame
             # Example 1: The following example creates a DataFrame from a table or
                          a view.
@@ -538,9 +711,9 @@ class DataFrame():
         """
         if schema_name:
-            return cls(in_schema(schema_name, table_name, datalake_name))
-        return cls(table_name, index, index_label)
+            return cls(table_name=in_schema(schema_name, table_name, datalake_name),
+                       index=index, index_label=index_label)
+        return cls(table_name=table_name, index=index, index_label=index_label)
     @classmethod
     @collect_queryband(queryband="DF_fromQuery")
@@ -687,6 +860,300 @@ class DataFrame():
             df.__setattr__(arg, arg_value)
         return df
+    @classmethod
+    @collect_queryband(queryband="DF_fromPandas")
+    def from_pandas(cls, pandas_df, index=True, index_label=None, primary_index=None):
+        """
+        DESCRIPTION:
+            Creates a teradataml DataFrame from a pandas DataFrame.
+        PARAMETERS:
+            pandas_df:
+                Required Argument.
+                Specifies the pandas DataFrame to be converted to teradataml DataFrame.
+                Types: pandas DataFrame
+            index:
+                Optional Argument.
+                Specifies whether to save Pandas DataFrame index as a column or not.
+                Default Value: True
+                Types: bool
+            index_label:
+                Optional Argument.
+                Specifies the column label(s) for Pandas DataFrame index column(s).
+                Note:
+                    * Refer to the "index_label" parameter of copy_to_sql() for more details.
+                Default Value: None
+                Types: str OR list of str
+            primary_index:
+                Optional Argument.
+                Specifies which column(s) to use as primary index for the teradataml DataFrame.
+                Types: str OR list of str
+        RETURNS:
+            teradataml DataFrame
+        RAISES:
+            TeradataMlException
+        EXAMPLES:
+            >>> import pandas as pd
+            >>> from teradataml import DataFrame
+            >>> pdf = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
+            >>> pdf1 = pd.DataFrame([[1, 2], [3, 4]])
+            # Example 1: Create a teradataml DataFrame from a pandas DataFrame.
+            >>> df = DataFrame.from_pandas(pdf)
+            >>> df
+               col1  col2  index_label
+            0     3     6            2
+            1     2     5            1
+            2     1     4            0
+            # Example 2: Create a teradataml DataFrame from a pandas DataFrame
+            #            and do not save the index as a column.
+            >>> df = DataFrame.from_pandas(pdf, index=False)
+            >>> df
+               col1  col2
+            0     3     6
+            1     2     5
+            2     1     4
+            # Example 3: Create a teradataml DataFrame from a pandas DataFrame
+            #            with index label as 'id' and set it as primary index.
+            >>> df = DataFrame.from_pandas(pdf, index=True, index_label='id', primary_index='id')
+            >>> df
+                col1  col2
+            id
+            2      3     6
+            1      2     5
+            0      1     4
+            # Example 4: Create a teradataml DataFrame from a pandas DataFrame where
+            #            columns are not explicitly defined in the pandas DataFrame.
+            >>> df = DataFrame.from_pandas(pdf1)
+            >>> df
+               col_0  col_1  index_label
+            0      3      4            1
+            1      1      2            0
+        """
+        # Validate 'pandas_df' argument, other arguments, will be validated as part of DataFrame().
+        arg_type_matrix = []
+        arg_type_matrix.append(["pandas_df", pandas_df, False, (pd.DataFrame,), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        return cls(pandas_df, index, index_label, primary_index=primary_index)
+    @classmethod
+    @collect_queryband(queryband="DF_fromDict")
+    def from_dict(cls, data, columns=None):
+        """
+        DESCRIPTION:
+            Creates a DataFrame from a dictionary containing values as lists or numpy arrays.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the Python dictionary to create a teradataml DataFrame.
+                Notes:
+                    * Keys of the dictionary are used as column names.
+                    * Values of the dictionary should be lists or numpy arrays.
+                    * Nested dictionaries are not supported.
+                Types: dict
+            columns:
+                Optional Argument.
+                Specifies the column names for the DataFrame.
+                Types: str OR list of str
+        RETURNS:
+            teradataml DataFrame
+        RAISES:
+            TeradataMlException
+        EXAMPLES:
+            >>> from teradataml import DataFrame
+            >>> data_dict = {"name": ["Alice", "Bob", "Charlie"], "age": [25, 30, 28]}
+            # Example 1: Create a teradataml DataFrame from a dictionary where
+            #            keys are column names and values are lists of column data.
+            >>> df = DataFrame.from_dict(data_dict)
+            >>> df
+                  name  age
+            0  Charlie   28
+            1      Bob   30
+            2    Alice   25
+            # Example 2: Create a teradataml DataFrame from a dictionary where
+            #            keys are column names and values are numpy arrays.
+            >>> import numpy as np
+            >>> data_dict = {"col1": np.array([1, 2, 3]), "col2": np.array([4, 5, 6])}
+            >>> df = DataFrame.from_dict(data_dict)
+            >>> df
+               col1  col2
+            0     3     6
+            1     2     5
+            2     1     4
+        """
+        arg_type_matrix = []
+        arg_type_matrix.append(["data", data, False, (dict), True])
+        arg_type_matrix.append(["columns", columns, True, (str, list), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        return cls(data, columns=columns, index=False)
+    @classmethod
+    @collect_queryband(queryband="DF_fromRecords")
+    def from_records(cls, data, columns=None, **kwargs):
+        """
+        DESCRIPTION:
+            Create a DataFrame from a list of lists/tuples/dictionaries/numpy arrays.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the iterator of data or the list of lists/tuples/dictionaries/numpy arrays to
+                be converted to teradataml DataFrame.
+                Note:
+                    * Nested lists or tuples or dictionaries are not supported.
+                Types: Iterator, list
+            columns:
+                Optional Argument.
+                Specifies the column names for the DataFrame.
+                Note:
+                    * If the data is a list of lists/tuples/numpy arrays and this argument
+                      is not specified, column names will be auto-generated as 'col_0', 'col_1', etc.
+                Types: str OR list of str
+            kwargs:
+                exclude:
+                    Optional Argument.
+                    Specifies the columns to be excluded from the DataFrame.
+                    Types: list OR tuple
+                coerce_float:
+                    Optional Argument.
+                    Specifies whether to convert values of non-string, non-numeric objects (like decimal.Decimal)
+                    to floating point, useful for SQL result sets.
+                    Default Value: True
+                    Types: bool
+                nrows:
+                    Optional Argument.
+                    Specifies the number of rows to be read from the data if the data is iterator.
+                    Types: int
+        RETURNS:
+            teradataml DataFrame
+        RAISES:
+            TeradataMlException
+        EXAMPLES:
+            >>> from teradataml import DataFrame
+            # Example 1: Create a teradataml DataFrame from a list of lists.
+            >>> df = DataFrame.from_records([['Alice', 1], ['Bob', 2]], columns=['name', 'age'])
+            >>> df
+                name  age
+            0    Bob    2
+            1  Alice    1
+            # Example 2: Create a teradataml DataFrame from a list of tuples.
+            >>> df = DataFrame.from_records([('Alice', 1), ('Bob', 3)], columns=['name', 'age'])
+            >>> df
+                name  age
+            0    Bob    3
+            1  Alice    1
+            # Example 3: Create a teradataml DataFrame from a list of dictionaries.
+            >>> df = DataFrame.from_records([{'name': 'Alice', 'age': 4}, {'name': 'Bob', 'age': 2}])
+            >>> df
+                name  age
+            0    Bob    2
+            1  Alice    4
+            # Example 4: Create a teradataml DataFrame from a list where columns
+            #            are not explicitly defined.
+            >>> df = DataFrame.from_records([['Alice', 1], ['Bob', 2]])
+            >>> df
+               col_0  col_1
+            0    Bob      2
+            1  Alice      1
+            # Example 5: Create a teradataml DataFrame from a list by excluding 'grade' column.
+            >>> df = DataFrame.from_records([['Alice', 1, 'A'], ['Bob', 2, 'B']],
+            ...                               columns=['name', 'age', 'grade'],
+            ...                               exclude=['grade'])
+            >>> df
+                name  age
+            0    Bob    2
+            1  Alice    1
+            # Example 6: Create a teradataml DataFrame from a list of lists
+            #            with "coerce_float" set to False.
+            >>> df = DataFrame.from_records([[1, Decimal('2.5')], [3, Decimal('4.0')]],
+            ...                              columns=['col1', 'col2'], coerce_float=False)
+            >>> df
+                col1  col2
+            0      3   4.0
+            1      1   2.5
+            >>> df.tdtypes
+            col1                                          BIGINT()
+            col2           VARCHAR(length=1024, charset='UNICODE')
+            # Example 7: Create a teradataml DataFrame from a list of lists
+            #            with "coerce_float" set to True.
+            >>> from decimal import Decimal
+            >>> df = DataFrame.from_records([[1, Decimal('2.5')], [3, Decimal('4.0')]],
+            ...                              columns=['col1', 'col2'], coerce_float=True)
+            >>> df
+               col1  col2
+            0     3   4.0
+            1     1   2.5
+            >>> df.tdtypes
+            col1           BIGINT()
+            col2            FLOAT()
+            # Example 8: Create a teradataml DataFrame from an iterator with "nrows" set to 2.
+            >>> def data_gen():
+            ...     yield ['Alice', 1]
+            ...     yield ['Bob', 2]
+            ...     yield ['Charlie', 3]
+            >>> df = DataFrame.from_records(data_gen(), columns=['name', 'age'], nrows=2)
+            >>> df
+                name  age
+            0    Bob    2
+            1  Alice    1
+        """
+        exclude = kwargs.get("exclude", None)
+        coerce_float = kwargs.get("coerce_float", True)
+        nrows = kwargs.get("nrows", None)
+        arg_type_matrix = []
+        dtypes = (list, tuple, dict)
+        arg_type_matrix.append(["data", data, False, (Iterator, _ListOf(dtypes)), True])
+        arg_type_matrix.append(["columns", columns, True, (str, _ListOf(str)), True])
+        arg_type_matrix.append(["exclude", exclude, True, (_ListOf(str),), True])
+        arg_type_matrix.append(["coerce_float", coerce_float, True, (bool, ), True])
+        arg_type_matrix.append(["nrows", nrows, True, (int,), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        if isinstance(columns, str):
+            columns = [columns]
+        df = pd.DataFrame.from_records(data, columns=columns, exclude=exclude,
+                                       coerce_float=coerce_float, nrows=nrows)
+        return cls(df, index=False)
     def create_temp_view(self, name):
         """
         DESCRIPTION:
@@ -1144,9 +1611,19 @@ class DataFrame():
                                                    datalake=self._datalake)
         # Extract column names and corresponding teradatasqlalchemy types.
-        col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
-                                                                          self._table,
-                                                                          self._datalake)
+        try:
+            # For latest OTF help table query results.
+            col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
+                                                                             self._table,
+                                                                             self._datalake,
+                                                                             use_dialect=True)
+        except NoSuchColumnError:
+            # For older OTF help table query result.
+            col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
+                                                                             self._table,
+                                                                             self._datalake)
+        # Create a SQLAlchemy table object representing datalake table.
         t = sqlalchemy.Table(self._table, meta, schema=self._database,
                              *(Column(col_name, col_type) for col_name, col_type in zip(col_names, col_types)))
         return _MetaExpression(t)
@@ -2924,9 +3401,8 @@ class DataFrame():
             msg = Messages.get_message(errcode)
             raise TeradataMlException(msg, errcode)
-    @argument_deprecation("20.0.0.5", "include", False, None)
     @collect_queryband(queryband="DF_describe")
-    def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None,
+    def describe(self, percentiles=[.25, .5, .75], verbose=False, distinct=False, statistics=None,
                  columns=None, pivot=False):
         """
         DESCRIPTION:
@@ -2956,18 +3432,6 @@ class DataFrame():
                 Default Values: [.25, .5, .75], which returns the 25th, 50th, and 75th percentiles.
                 Types: float or List of floats
-            include:
-                Optional Argument.
-                Values can be either None or "all".
-                If the value is "all", both numeric and non-numeric columns are included.
-                Computes count, mean, std, min, percentiles, and max for numeric columns.
-                Computes count and unique for non-numeric columns.
-                If the value is None, only numeric columns are used for collecting statistics.
-                Note:
-                    * Value 'all' is not applicable for 'Time Series Aggregate Mode'.
-                Default Values: None
-                Types: str
             verbose:
                 Optional Argument.
                 Specifies a boolean value to be used for time series aggregation, stating whether to get
@@ -2994,7 +3458,6 @@ class DataFrame():
                 Computes count and unique for non-numeric columns.
                 Notes:
                     1. statistics is not applicable for 'Time Series Aggregate Mode'.
-                    2. statistics should not be used with include as 'all'.
                 Permitted Values: count, mean, min, max, unique, std, describe, percentile
                 Default Values: None
                 Types: str or List of str
@@ -3310,7 +3773,6 @@ class DataFrame():
         awu_matrix = []
         awu_matrix.append(["columns", columns, True, (str, list), True])
         awu_matrix.append(["percentiles", percentiles, True, (float, list)])
-        awu_matrix.append(["include", include, True, (str), True, [None, "all"]])
         awu_matrix.append(["verbose", verbose, True, (bool)])
         awu_matrix.append(["distinct", distinct, True, (bool)])
         awu_matrix.append(["statistics", statistics, True, (str, list), True,
@@ -3334,22 +3796,11 @@ class DataFrame():
         if statistics:
             statistics = [stats.lower() for stats in UtilFuncs._as_list(statistics)]
-        # Argument include and statistics should not be used together
-        if include is not None and statistics is not None:
-            raise ValueError(Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH).format(
-                'include', 'statistics'
-            ))
         # Percentiles must be a list of values between 0 and 1.
         if not isinstance(percentiles, list) or not all(p > 0 and p < 1 for p in percentiles):
             raise ValueError(Messages.get_message(MessageCodes.INVALID_ARG_VALUE, percentiles, "percentiles",
                                                   "percentiles must be a list of values between 0 and 1"))
-        # Argument 'include' with value 'all' is not allowed for DataFrameGroupByTime
-        if include is not None and include.lower() == "all" and isinstance(self, DataFrameGroupByTime):
-            raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
-                'include', 'Aggregation', 'all', 'describe()', 'DataFrame or DataFrameGroupBy'))
         # Argument 'statistics' is not allowed for DataFrameGroupByTime
         if statistics is not None and isinstance(self, DataFrameGroupByTime):
             raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
@@ -3383,7 +3834,7 @@ class DataFrame():
                 # Construct the aggregate query.
                 agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
                                                                percentiles=percentiles, function_label=function_label,
-                                                               groupby_column_list=groupby_column_list, include=include,
+                                                               groupby_column_list=groupby_column_list, include=None,
                                                                is_time_series_aggregate=True, verbose=verbose,
                                                                distinct=distinct,
                                                                timebucket_duration=self._timebucket_duration,
@@ -3414,7 +3865,7 @@ class DataFrame():
                     # Construct the aggregate query.
                     agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
                                                                 percentiles=percentiles, function_label=function_label,
-                                                                groupby_column_list=groupby_column_list, include=include,
+                                                                groupby_column_list=groupby_column_list, include=None,
                                                                 is_time_series_aggregate=False, verbose=verbose,
                                                                 distinct=distinct, statistics=statistics)
@@ -5570,8 +6021,10 @@ class DataFrame():
                 Specifies the function(s) to apply on DataFrame columns.
                 Valid values for func are:
-                    'count', 'sum', 'min', 'max', 'mean', 'std', 'percentile', 'unique',
-                    'median', 'var'
+                    * 'count', 'sum', 'min', 'max', 'mean', 'std', 'percentile', 'percentile_<floatvalue>', 'unique',
+                      'median', 'var'
+                    * Note: In 'percentile_<floatvalue>', <floatvalue> specifies the desired percentile value to
+                            calculate aggregate. It should be in the range of 0.0 to 1.0 (both inclusive).
                 Acceptable formats for function(s) are
                     string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
@@ -5605,12 +6058,17 @@ class DataFrame():
                         Output column names after the above operation are:
                           min_employee_no, sum_employee_no, var_employee_no, min_first_name
-                    4. "func" passed as a ColumnExpression built using the aggregate functions.
+                    4. "percentile_<floatvalue>" passed to agg.
+                        >>> df.agg({'employee_no' : ['percentile_0.25', 'percentile_0.75', 'min']})
+                        >>> df.agg(['percentile_0.25', 'percentile_0.75', 'sum'])
+                        >>> df.agg('percentile_0.25')
+                    5. "func" passed as a ColumnExpression built using the aggregate functions.
                         >>> df.agg(df.first_name.count())
                         Output column name after the above operation is:
                           count(first_name)
-                    5. "func" passed as a list of ColumnExpression built using the aggregate functions.
+                    6. "func" passed as a list of ColumnExpression built using the aggregate functions.
                         >>> df.agg([df.employee_no.min(), df.first_name.count()])
                         Output column names after the above operation are:
                           min(employee_no), count(first_name)
@@ -5698,6 +6156,12 @@ class DataFrame():
               min_employee_no sum_employee_no  var_employee_no min_first_name
             0             100             313        44.333333           abcd
+            # Get the minimum, 25 percentile value and variance of employee number, by passing dictionary of
+            # column names to string function/list of string functions as parameter.
+            >>> df.agg({'employee_no' : ['min', 'percentile_0.25', 'var']})
+              min_employee_no  percentile_0.25_employee_no  var_employee_no
+            0              100                          100        44.333333
             # Get the minimum and sum of all the columns in the dataframe,
             # by passing list of string functions as parameter.
             >>> df.agg(['min', 'sum'])
@@ -5743,9 +6207,15 @@ class DataFrame():
                mean_employee_no unique_employee_no unique_first_name mean_joined_date unique_joined_date
             0        104.333333                  3                 2         60/12/04                  2
+            # Get the percentile of each column in the dataframe with default value 0.5.
             >>> df.agg('percentile')
-                  percentile_employee_no percentile_marks
-                0                    101             None
+                percentile_employee_no percentile_marks
+            0                    101             None
+            # Get 80 percentile of each column in the datafame.
+            >>> df.agg('percentile_0.8')
+               percentile_0.8_employee_no percentile_0.8_marks
+            0                         107                 None
             # Using another table 'sales' (having repeated values) to demonstrate operations
             # 'unique' and 'percentile'.
@@ -5762,9 +6232,11 @@ class DataFrame():
                 Blue Inc     90.0    50    95   101  2017-04-01
                 Red Inc     200.0   150   140  None  2017-04-01
-            >>> df.agg('percentile')
-                   percentile_Feb percentile_Jan percentile_Mar percentile_Apr
-                0           200.0            150            140            215
+            # Get 80 and 40 percentile values of each column in the dataframe.
+            >>> df1 = df.select(['Feb', 'Jan', 'Mar', 'Apr'])
+            >>> df1.agg(['percentile_0.8', 'percentile_0.4'])
+                percentile_0.8_Feb  percentile_0.4_Feb  percentile_0.8_Jan  percentile_0.4_Jan  percentile_0.8_Mar  percentile_0.4_Mar  percentile_0.8_Apr  percentile_0.4_Apr
+            0               210.0               200.0                 170                 150                 170                 140                 250                 194
             >>> df.agg('unique')
                   unique_accounts unique_Feb unique_Jan unique_Mar unique_Apr unique_datetime
@@ -5888,8 +6360,11 @@ class DataFrame():
                         groupby_col_names.append(col)
                         groupby_col_types.append(self[col].type)
-                        if col in col_names:
-                            # If group by column is not specified in the columns argument,
+                        include_grouping_columns = True if isinstance(self, DataFrameGroupBy) and \
+                                                           self._include_grouping_columns else False
+                        if not include_grouping_columns and col in col_names:
+                            # If 'include_grouping_columns' argument is set to True and,
+                            # group by column is not specified in the columns argument,
                             # then, we should ignore this processing, otherwise we
                             # should process it in the same way to remove the reference
                             # for grouping column from aggregation list.
@@ -5951,6 +6426,8 @@ class DataFrame():
         except TeradataMlException:
             raise
+        except ValueError:
+            raise
         except Exception as err:
             raise TeradataMlException(Messages.get_message(
                 MessageCodes.EXECUTION_FAILED, "perform {} on DataFrame".format(operation), str(err)),
@@ -7760,7 +8237,7 @@ class DataFrame():
         """
         return (type(None), int, float, str, decimal.Decimal, ColumnExpression, ClauseElement)
-    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
+    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, node_id, **kwargs):
         """
         DESCRIPTION:
             Function generates the MetaExpression and AED nodeid for DataFrame.assign()
@@ -7773,6 +8250,11 @@ class DataFrame():
                 Default Value: False
                 Types: bool
+            node_id:
+                Optional Argument.
+                Specifies the input nodeid for the assign operation.
+                Types: str
             kwargs:
                 keyword, value pairs
                 - keywords are the column names.
@@ -7800,7 +8282,7 @@ class DataFrame():
         # Join the expressions in result.
         assign_expression = ', '.join(list(map(lambda x: x[1], result)))
-        new_nodeid = self._aed_utils._aed_assign(self._nodeid,
+        new_nodeid = self._aed_utils._aed_assign(node_id,
                                                  assign_expression,
                                                  AEDConstants.AED_ASSIGN_DROP_EXISITING_COLUMNS.value)
@@ -7939,7 +8421,7 @@ class DataFrame():
                     env_mapper[env_name] = [colname]
         else:
             env_mapper[env_name] = udf_expr.keys()
+        debug = False
         for env_name, cols in env_mapper.items():
             # Create a dictionary of output columns to column type.
             returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
@@ -7950,6 +8432,7 @@ class DataFrame():
             # Create a dictionary of output column name to udf arguments
             function_args = {}
             for colname, col in udf_expr.items():
+                debug |= col._debug
                 delimiter = col._delimiter
                 quotechar = col._quotechar
                 if colname in cols:
@@ -7982,7 +8465,9 @@ class DataFrame():
                                                 columns_definitions=columns_definitions,
                                                 output_type_converters={
                                                     col_name: _Dtypes._teradata_type_to_python_type(col_type)
-                                                    for col_name, col_type in returns.items()})
+                                                    for col_name, col_type in returns.items()},
+                                                debug=debug
+                                                )
             df = tbl_operators.execute()
         return df
@@ -8624,8 +9109,34 @@ class DataFrame():
         # from udf expression.
         if bool(regular_expr):
             try:
-                (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(drop_columns, **regular_expr)
+                root_node_id = None
+                root_df_col = df.columns
+                # Get the previous node type, if it is assign and drop_columns is False,
+                # then check if the previous assign arguments exists and are not present
+                # in either the root dataframe columns or the current assign arguments.
+                # if these conditions are met, obtain the root node id (i.e., the first
+                # node of the assign operation) and merge the previous assign arguments with the current ones.
+                prev_node_type = df._aed_utils._aed_get_node_query_type(df._nodeid)
+                if not drop_columns and prev_node_type == "assign" and df._previous_assign_args is not None:
+                    if not df._root_columns & df._previous_assign_args.keys() and \
+                       not df._previous_assign_args.keys() & regular_expr.keys():
+                        # Get the root node id and root dataframe columns.
+                        root_df_col = df._root_columns
+                        root_node_id = df._aed_utils._aed_get_parent_nodeids(df._nodeid)[0]
+                        regular_expr = {**df._previous_assign_args, **regular_expr}
+                # If root_node_id is None, assign the current node id as root node of assign operation
+                node_id = root_node_id if root_node_id is not None else df._nodeid
+                # Generate new meta expression and node id for the new dataframe.
+                (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(
+                                drop_columns, node_id = node_id, **regular_expr)
                 df = df._create_dataframe_from_node(new_nodeid, new_meta, df._index_label)
+                df._previous_assign_args = regular_expr
+                df._root_columns = root_df_col
             except Exception as err:
                 errcode = MessageCodes.TDMLDF_INFO_ERROR
                 msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
@@ -8962,6 +9473,15 @@ class DataFrame():
                     Permitted Values: "CUBE", "ROLLUP", None
                     Types: str or NoneType
+                include_grouping_columns:
+                    Optional Argument.
+                    Specifies whether to include aggregations on the grouping column(s) or not.
+                    When set to True, the resultant DataFrame will have the aggregations on the
+                    columns mentioned in "columns_expr". Otherwise, resultant DataFrame will not have
+                    aggregations on the columns mentioned in "columns_expr".
+                    Default Value: False
+                    Types: bool
         NOTES:
             1. Users can still apply teradataml DataFrame methods (filters/sort/etc) on top of the result.
             2. Consecutive operations of grouping, i.e., groupby_time(), resample() and groupby() are not permitted.
@@ -8978,14 +9498,54 @@ class DataFrame():
             TeradataMlException
         EXAMPLES:
+            # Load the data to run the example.
             >>> load_example_data("dataframe","admissions_train")
+            # Create a DataFrame on 'admissions_train' table.
             >>> df = DataFrame("admissions_train")
+            >>> df
+               masters   gpa     stats programming  admitted
+            id
+            15     yes  4.00  Advanced    Advanced         1
+            34     yes  3.85  Advanced    Beginner         0
+            13      no  4.00  Advanced      Novice         1
+            38     yes  2.65  Advanced    Beginner         1
+            5       no  3.44    Novice      Novice         0
+            40     yes  3.95    Novice    Beginner         0
+            7      yes  2.33    Novice      Novice         1
+            22     yes  3.46    Novice    Beginner         0
+            26     yes  3.57  Advanced    Advanced         1
+            17      no  3.83  Advanced    Advanced         1
+            # Example 1: Find the minimum value of all valid columns by
+            #            grouping the DataFrame with column 'masters'.
             >>> df1 = df.groupby(["masters"])
             >>> df1.min()
               masters min_id  min_gpa min_stats min_programming min_admitted
             0      no      3     1.87  Advanced        Advanced            0
             1     yes      1     1.98  Advanced        Advanced            0
+            # Example 2: Find the sum of all valid columns by grouping the DataFrame
+            #            with columns 'masters' and 'admitted'. Include grouping columns
+            #            in aggregate function 'sum'.
+            >>> df1 = df.groupby(["masters", "admitted"], include_grouping_columns=True)
+            >>> df1.sum()
+              masters  admitted  sum_id  sum_gpa  sum_admitted
+            0     yes         1     188    34.35            10
+            1     yes         0     289    43.36             0
+            2      no         0      41     6.44             0
+            3      no         1     302    57.52            16
+            # Example 3: Find the sum of all valid columns by grouping the DataFrame with
+            #            columns 'masters' and 'admitted'. Do not include grouping columns
+            #            in aggregate function 'sum'.
+            >>> df1 = df.groupby(["masters", "admitted"], include_grouping_columns=False)
+            >>> df1.sum()
+              masters  admitted  sum_id  sum_gpa
+            0     yes         0     289    43.36
+            1      no         0      41     6.44
+            2      no         1     302    57.52
+            3     yes         1     188    34.35
         """
         # Argument validations
         arg_info_matrix = []
@@ -8993,6 +9553,8 @@ class DataFrame():
         option = kwargs.get("option", None)
         arg_info_matrix.append(["option", option, True, (str, type(None)), True,
                                 ["CUBE", "ROLLUP", None]])
+        include_grouping_columns = kwargs.get("include_grouping_columns", False)
+        arg_info_matrix.append(["include_grouping_columns", include_grouping_columns, True, (bool)])
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_matrix)
@@ -9037,7 +9599,8 @@ class DataFrame():
             groupbyexpr = ', '.join(UtilFuncs._teradata_quote_arg(col, "\"", False) for col in column_list)
             groupbyObj = DataFrameGroupBy(self._nodeid, self._metaexpr, self._column_names_and_types, self.columns,
-                                          groupbyexpr, column_list, option)
+                                          groupbyexpr, column_list, option, include_grouping_columns)
             return groupbyObj
         except TeradataMlException:
             raise
@@ -11569,6 +12132,10 @@ class DataFrame():
         DESCRIPTION:
             Function to apply a user defined function to each row in the
             teradataml DataFrame, leveraging Vantage's Script Table Operator.
+            Notes:
+                1. The function requires to use same Python version in both Vantage and local environment.
+                2. Teradata recommends to use "dill" package with same version in both Vantage and
+                   local environment.
         PARAMETERS:
             user_function:
@@ -11749,6 +12316,15 @@ class DataFrame():
                 Default Value: True
                 Types: bool
+            debug:
+                Optional Argument.
+                Specifies whether to display the script file path generated during function execution or not. This
+                argument helps in debugging when there are any failures during function execution. When set
+                to True, function displays the path of the script and does not remove the file from local file system.
+                Otherwise, file is removed from the local file system.
+                Default Value: False
+                Types: bool
         RETURNS:
             1. teradataml DataFrame if exec_mode is "IN-DB".
             2. Pandas DataFrame if exec_mode is "LOCAL".
@@ -11901,6 +12477,7 @@ class DataFrame():
         sort_ascending = kwargs.pop('sort_ascending', True)
         auth = kwargs.pop('auth', None)
         charset = kwargs.pop('charset', None)
+        debug = kwargs.pop('debug', False)
         # Check for other extra/unknown arguments.
         unknown_args = list(kwargs.keys())
@@ -11919,7 +12496,7 @@ class DataFrame():
                                           sort_ascending=sort_ascending,
                                           returns=returns, delimiter=delimiter,
                                           quotechar=quotechar, auth=auth,
-                                          charset=charset, num_rows=num_rows)
+                                          charset=charset, num_rows=num_rows, debug=debug)
         return tbl_op_util.execute()
@@ -11936,6 +12513,10 @@ class DataFrame():
         DESCRIPTION:
             Function to apply a user defined function to a group or partition of rows
             in the teradataml DataFrame, leveraging Vantage's Script Table Operator.
+            Notes:
+                1. The function requires to use same Python version in both Vantage and local environment.
+                2. Teradata recommends to use "dill" package with same version in both Vantage and
+                   local environment.
         PARAMETERS:
             user_function:
@@ -12146,6 +12727,15 @@ class DataFrame():
                 Default Value: True
                 Types: bool
+            debug:
+                Optional Argument.
+                Specifies whether to display the script file path generated during function execution or not. This
+                argument helps in debugging when there are any failures during function execution. When set
+                to True, function displays the path of the script and does not remove the file from local file system.
+                Otherwise, file is removed from the local file system.
+                Default Value: False
+                Types: bool
         RETURNS:
             1. teradataml DataFrame if exec_mode is "IN-DB".
             2. Pandas DataFrame if exec_mode is "LOCAL".
@@ -12311,6 +12901,7 @@ class DataFrame():
         sort_ascending = kwargs.pop('sort_ascending', True)
         auth = kwargs.pop('auth', None)
         charset = kwargs.pop('charset', None)
+        debug = kwargs.pop('debug', False)
         # Check for other extra/unknown arguments.
         unknown_args = list(kwargs.keys())
@@ -12329,7 +12920,7 @@ class DataFrame():
                                           sort_ascending=sort_ascending,
                                           returns=returns, delimiter=delimiter,
                                           quotechar=quotechar, auth=auth,
-                                          charset=charset, num_rows=num_rows)
+                                          charset=charset, num_rows=num_rows, debug=debug)
         return tbl_op_util.execute()
@@ -12346,9 +12937,9 @@ class DataFrame():
             teradataml DataFrame, leveraging Apply Table Operator of Open
             Analytics Framework.
             Notes:
-                 1. The function requires dill package with same version in both remote environment
-                    and local environment.
-                 2. Teradata recommends to use same Python version in both remote and local environment.
+                1. The function requires to use same Python version in both remote environment and local environment.
+                2. Teradata recommends to use "dill" package with same version in both remote environment and
+                   local environment.
         PARAMETERS:
             user_function:
@@ -12531,6 +13122,15 @@ class DataFrame():
                 Default value: "csv"
                 Types: str
+            debug:
+                Optional Argument.
+                Specifies whether to display the script file path generated during function execution or not. This
+                argument helps in debugging when there are any failures during function execution. When set
+                to True, function displays the path of the script and does not remove the file from local file system.
+                Otherwise, file is removed from the local file system.
+                Default Value: False
+                Types: bool
         RETURNS:
             teradataml DataFrame.
@@ -12707,6 +13307,7 @@ class DataFrame():
         is_local_order = kwargs.pop('is_local_order', False)
         nulls_first = kwargs.pop('nulls_first', True)
         sort_ascending = kwargs.pop('sort_ascending', True)
+        debug = kwargs.pop('debug', False)
         # Check for other extra/unknown arguments.
         unknown_args = list(kwargs.keys())
@@ -12729,7 +13330,8 @@ class DataFrame():
                                           charset=None,
                                           num_rows=num_rows,
                                           env_name=env_name,
-                                          style=style)
+                                          style=style,
+                                          debug=debug)
         return tbl_op_util.execute()
@@ -13075,7 +13677,7 @@ class DataFrame():
                                                              False)
             column_names = list(dict.fromkeys(column_names))
-        if list_td_reserved_keywords(column_names) or UtilFuncs._is_ascii(column_names):
+        if list_td_reserved_keywords(column_names) or UtilFuncs._is_non_ascii(column_names):
             column_names = UtilFuncs._teradata_quote_arg(column_names, "\"", False)
         col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, column_names)
@@ -15261,7 +15863,7 @@ class DataFrame():
         return self.assign(**new_columns, drop_columns=True).select(self.columns)
     @collect_queryband(queryband="DF_cube")
-    def cube(self, columns):
+    def cube(self, columns, include_grouping_columns=False):
         """
         DESCRIPTION:
             cube() function creates a multi-dimensional cube for the DataFrame
@@ -15275,6 +15877,15 @@ class DataFrame():
                 Specifies the name(s) of input teradataml DataFrame column(s).
                 Types: str OR list of str(s)
+            include_grouping_columns:
+                Optional Argument.
+                Specifies whether to include aggregations on the grouping column(s) or not.
+                When set to True, the resultant DataFrame will have the aggregations on the
+                columns mentioned in "columns". Otherwise, resultant DataFrame will not have
+                aggregations on the columns mentioned in "columns".
+                Default Value: False
+                Types: bool
         RETURNS:
             teradataml DataFrameGroupBy
@@ -15282,9 +15893,27 @@ class DataFrame():
             TeradataMlException
         EXAMPLES :
-            # Example 1: Analyzes the data by grouping into masters and stats dimensions.
+            # Load the data to run the example.
             >>> load_example_data("dataframe","admissions_train")
+            # Create a DataFrame on 'admissions_train' table.
             >>> df = DataFrame("admissions_train")
+            >>> df
+               masters   gpa     stats programming  admitted
+            id
+            15     yes  4.00  Advanced    Advanced         1
+            34     yes  3.85  Advanced    Beginner         0
+            13      no  4.00  Advanced      Novice         1
+            38     yes  2.65  Advanced    Beginner         1
+            5       no  3.44    Novice      Novice         0
+            40     yes  3.95    Novice    Beginner         0
+            7      yes  2.33    Novice      Novice         1
+            22     yes  3.46    Novice    Beginner         0
+            26     yes  3.57  Advanced    Advanced         1
+            17      no  3.83  Advanced    Advanced         1
+            # Example 1: Find the sum of all valid columns by grouping the
+            #            DataFrame columns with 'masters' and 'stats'.
             >>> df1 = df.cube(["masters", "stats"]).sum()
             >>> df1
               masters     stats  sum_id  sum_gpa  sum_admitted
@@ -15299,10 +15928,42 @@ class DataFrame():
             8      no  Advanced     189    34.95             9
             9     yes    Novice      98    13.74             1
+            # Example 2: Find the avg of all valid columns by grouping the DataFrame
+            #            with columns 'masters' and 'admitted'. Include grouping columns
+            #            in aggregate function 'avg'.
+            >>> df1 = df.cube(["masters", "admitted"], include_grouping_columns=True).avg()
+            >>> df1
+              masters  admitted     avg_id   avg_gpa  avg_admitted
+            0     yes       NaN  21.681818  3.532273      0.454545
+            1    None       1.0  18.846154  3.533462      1.000000
+            2      no       NaN  19.055556  3.553333      0.888889
+            3     yes       0.0  24.083333  3.613333      0.000000
+            4    None       NaN  20.500000  3.541750      0.650000
+            5    None       0.0  23.571429  3.557143      0.000000
+            6     yes       1.0  18.800000  3.435000      1.000000
+            7      no       1.0  18.875000  3.595000      1.000000
+            8      no       0.0  20.500000  3.220000      0.000000
+            # Example 3: Find the avg of all valid columns by grouping the DataFrame with
+            #            columns 'masters' and 'admitted'. Do not include grouping columns
+            #            in aggregate function 'avg'.
+            >>> df1 = df.cube(["masters", "admitted"], include_grouping_columns=False).avg()
+            >>> df1
+              masters  admitted     avg_id   avg_gpa
+            0      no       0.0  20.500000  3.220000
+            1    None       1.0  18.846154  3.533462
+            2      no       NaN  19.055556  3.553333
+            3     yes       0.0  24.083333  3.613333
+            4    None       NaN  20.500000  3.541750
+            5    None       0.0  23.571429  3.557143
+            6     yes       1.0  18.800000  3.435000
+            7     yes       NaN  21.681818  3.532273
+            8      no       1.0  18.875000  3.595000
         """
         # Validate columns argument.
         arg_info_matrix = []
         arg_info_matrix.append(["columns", columns, False, (str, list), True])
+        arg_info_matrix.append(["include_grouping_columns", include_grouping_columns, False, bool])
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_matrix)
@@ -15312,10 +15973,10 @@ class DataFrame():
         # Query generation of cube API is same as the group by.
         # Only 'cube' is concatenated with 'group by' clause.
-        return self.groupby(columns, option="cube")
+        return self.groupby(columns, option="cube", include_grouping_columns=include_grouping_columns)
     @collect_queryband(queryband="DF_rollup")
-    def rollup(self, columns):
+    def rollup(self, columns, include_grouping_columns=False):
         """
         DESCRIPTION:
             rollup() function creates a multi-dimensional rollup for the DataFrame
@@ -15329,6 +15990,15 @@ class DataFrame():
                 Specifies the name(s) of input teradataml DataFrame column(s).
                 Types: str OR list of str(s)
+            include_grouping_columns:
+                Optional Argument.
+                Specifies whether to include aggregations on the grouping column(s) or not.
+                When set to True, the resultant DataFrame will have the aggregations on the
+                columns mentioned in "columns". Otherwise, resultant DataFrame will not have
+                aggregations on the columns mentioned in "columns".
+                Default Value: False
+                Types: bool
         RETURNS:
             teradataml DataFrameGroupBy
@@ -15336,9 +16006,27 @@ class DataFrame():
             TeradataMlException
         EXAMPLES :
-            # Example 1: Analyzes the data by grouping into masters and stats dimensions.
+            # Load the data to run the example.
             >>> load_example_data("dataframe","admissions_train")
+            # Create a DataFrame on 'admissions_train' table.
             >>> df = DataFrame("admissions_train")
+            >>> df
+               masters   gpa     stats programming  admitted
+            id
+            15     yes  4.00  Advanced    Advanced         1
+            34     yes  3.85  Advanced    Beginner         0
+            13      no  4.00  Advanced      Novice         1
+            38     yes  2.65  Advanced    Beginner         1
+            5       no  3.44    Novice      Novice         0
+            40     yes  3.95    Novice    Beginner         0
+            7      yes  2.33    Novice      Novice         1
+            22     yes  3.46    Novice    Beginner         0
+            26     yes  3.57  Advanced    Advanced         1
+            17      no  3.83  Advanced    Advanced         1
+            # Example 1: Find the sum of all valid columns by grouping the
+            #            DataFrame columns with 'masters' and 'stats'.
             >>> df1 = df.rollup(["masters", "stats"]).sum()
             >>> df1
               masters     stats  sum_id  sum_gpa  sum_admitted
@@ -15351,11 +16039,39 @@ class DataFrame():
             6     yes  Beginner      13    14.71             2
             7     yes  Advanced     366    49.26             7
             8      no  Advanced     189    34.95             9
+            # Example 2: Find the avg of all valid columns by grouping the DataFrame
+            #            with columns 'masters' and 'admitted'. Include grouping columns
+            #            in aggregate function 'avg'.
+            >>> df1 = df.rollup(["masters", "admitted"], include_grouping_columns=True).avg()
+            >>> df1
+              masters  admitted     avg_id   avg_gpa  avg_admitted
+            0      no       NaN  19.055556  3.553333      0.888889
+            1     yes       NaN  21.681818  3.532273      0.454545
+            2    None       NaN  20.500000  3.541750      0.650000
+            3     yes       0.0  24.083333  3.613333      0.000000
+            4      no       1.0  18.875000  3.595000      1.000000
+            5     yes       1.0  18.800000  3.435000      1.000000
+            6      no       0.0  20.500000  3.220000      0.000000
+            # Example 3: Find the avg of all valid columns by grouping the DataFrame with
+            #            columns 'masters' and 'admitted'. Do not include grouping columns
+            #            in aggregate function 'avg'.
+            >>> df1 = df.rollup(["masters", "admitted"], include_grouping_columns=False).avg()
+            >>> df1
+              masters  admitted     avg_id   avg_gpa
+            0      no       NaN  19.055556  3.553333
+            1     yes       NaN  21.681818  3.532273
+            2      no       0.0  20.500000  3.220000
+            3     yes       0.0  24.083333  3.613333
+            4      no       1.0  18.875000  3.595000
+            5     yes       1.0  18.800000  3.435000
+            6    None       NaN  20.500000  3.541750
         """
         # Validate columns argument.
         arg_info_matrix = []
         arg_info_matrix.append(["columns", columns, False, (str, list), True])
+        arg_info_matrix.append(["include_grouping_columns", include_grouping_columns, False, bool])
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_matrix)
@@ -15365,8 +16081,255 @@ class DataFrame():
         # Query generation of cube API is same as the group by.
         # Only 'rollup' is concatenated with 'group by' clause.
-        return self.groupby(columns, option="rollup")
+        return self.groupby(columns, option="rollup", include_grouping_columns=include_grouping_columns)
+    # Metadata functions for DataFrame created on datalake/OTF table.
+    @property
+    @collect_queryband(queryband="DF_snpsht")
+    @df_utils.check_otf_dataframe()
+    def snapshots(self):
+        """
+        DESCRIPTION:
+            Gets snapshot information for a DataLake table.
+        PARAMETERS:
+            None
+        RETURNS:
+            teradataml DataFrame.
+        RAISES:
+            TeradataMLException.
+        EXAMPLES :
+            # Example 1: Get the snapshot information for datalake table.
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> in_schema_tbl = in_schema(schema_name="datalake_db",
+            ...                           table_name="datalake_table",
+            ...                           datalake_name="datalake")
+            >>> datalake_df = DataFrame(in_schema_tbl)
+            >>> datalake_df.snapshots
+                         snapshotId	  snapshotTimestamp	timestampMSecs	                                     manifestList	  summary
+            0	6373759902296319074	2023-06-15 00:07:47	1686787667420	s3://vim-iceberg-v1/glue/metadata/snap-6373759...	{"added-data-files":"1","added-records":"5","a...}
+            1	4768076782814510171	2023-06-15 00:09:01	1686787741964	s3://vim-iceberg-v1/glue/metadata/snap-4768076...	{"added-data-files":"1","added-records":"2","a...}
+            2	7771482207931850214	2024-05-29 04:59:09	1716958749946	s3://vim-iceberg-v1/glue/metadata/snap-7771482...	{"deleted-data-files":"2","deleted-records":"7...}
+            3	1545363077953282623	2024-05-29 05:13:39	1716959619455	s3://vim-iceberg-v1/glue/metadata/snap-1545363...	{"changed-partition-count":"0","total-records"...}
+            4	2166707884289108360	2024-05-29 05:17:49	1716959869075	s3://vim-iceberg-v1/glue/metadata/snap-2166707...	{"changed-partition-count":"0","total-records"...}
+            5	8934190131471882700	2024-05-29 05:21:32	1716960092422	s3://vim-iceberg-v1/glue/metadata/snap-8934190...	{"changed-partition-count":"0","total-records"...}
+            6	3086605171258231948	2024-05-29 05:34:43	1716960883786	s3://vim-iceberg-v1/glue/metadata/snap-3086605...	{"changed-partition-count":"0","total-records"...}
+            7	7592503716012384122	2024-05-29 06:04:48	1716962688047	s3://vim-iceberg-v1/glue/metadata/snap-7592503...	{"changed-partition-count":"0","total-records"...}
+            8	2831061717890032890	2024-06-04 17:21:01	1717521661689	s3://vim-iceberg-v1/glue/metadata/snap-2831061...	{"added-data-files":"2","added-records":"7","a...}
+            9	8810491341502972715	2024-10-22 23:47:22	1729640842067	s3://vim-iceberg-v1/glue/metadata/snap-8810491...	{"added-data-files":"1","added-records":"1","a...}
+            10	3953136136558551163	2024-12-03 04:40:48	1733200848733	s3://vim-iceberg-v1/glue/metadata/snap-3953136...	{"added-data-files":"1","added-records":"4","a...}
+            11	6034775168901969481	2024-12-03 04:40:49	1733200849966	s3://vim-iceberg-v1/glue/metadata/snap-6034775...	{"deleted-data-files":"1","deleted-records":"5...}
+        """
+        return self._execute_metadata_query_and_generate_dataframe("TD_SNAPSHOTS")
+    @property
+    @collect_queryband(queryband="DF_prttns")
+    @df_utils.check_otf_dataframe()
+    def partitions(self):
+        """
+        DESCRIPTION:
+            Gets partition information for a DataLake table.
+        PARAMETERS:
+            None
+        RETURNS:
+            teradataml DataFrame.
+        RAISES:
+            TeradataMLException.
+        EXAMPLES :
+            # Example 1: Get the partition information for datalake table.
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> in_schema_tbl = in_schema(schema_name="datalake_db",
+            ...                           table_name="datalake_table",
+            ...                           datalake_name="datalake")
+            >>> datalake_df = DataFrame(in_schema_tbl)
+            >>> datalake_df.partitions
+                  id	name
+            0   1000	  c2
+            1   1001	  c3
+        """
+        return self._execute_metadata_query_and_generate_dataframe("TD_PARTITIONS")
+    @property
+    @collect_queryband(queryband="DF_mnfsts")
+    @df_utils.check_otf_dataframe()
+    def manifests(self):
+        """
+        DESCRIPTION:
+            Gets manifest information for a DataLake table.
+        PARAMETERS:
+            None
+        RETURNS:
+            teradataml DataFrame.
+        RAISES:
+            TeradataMLException.
+        EXAMPLES :
+            # Example 1: Get the manifest information for datalake table.
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> in_schema_tbl = in_schema(schema_name="datalake_db",
+            ...                           table_name="datalake_table",
+            ...                           datalake_name="datalake")
+            >>> datalake_df = DataFrame(in_schema_tbl)
+            >>> datalake_df.manifests
+                         snapshotId	    snapshotTimestamp	                              manifestList	                              manifestFile	manifestFileLength	datafilecount	totalrowcount
+            0	8068130797628952520	  2025-05-02 11:45:26	s3://vim-iceberg-v1/otftestdb/nt_sales/...	s3://vim-iceberg-v1/otftestdb/nt_sales/...	              7158	            6	            6
+        """
+        return self._execute_metadata_query_and_generate_dataframe("TD_MANIFESTS")
+    @property
+    @collect_queryband(queryband="DF_hstry")
+    @df_utils.check_otf_dataframe()
+    def history(self):
+        """
+        DESCRIPTION:
+            Gets the snapshot history related to a DataLake table.
+        PARAMETERS:
+            None
+        RETURNS:
+            teradataml DataFrame.
+        RAISES:
+            TeradataMLException.
+        EXAMPLES :
+            # Example 1: Get the partition information for datalake table.
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> in_schema_tbl = in_schema(schema_name="datalake_db",
+            ...                           table_name="datalake_table",
+            ...                           datalake_name="datalake")
+            >>> datalake_df = DataFrame(in_schema_tbl)
+            >>> datalake_df.history
+                                 id	              timestamp
+            0	8068130797628952520	    2025-05-02 11:45:26
+        """
+        return self._execute_metadata_query_and_generate_dataframe("TD_HISTORY")
+    def _execute_metadata_query_and_generate_dataframe(self, func_name):
+        """Function executes OTF metadata query and return result in DataFrame format"""
+        query = SQLBundle()._get_sql_query(SQLConstants.SQL_TD_OTF_METADATA).format(func_name,
+                                                                                    self._table_name)
+        return DataFrame.from_query(query)
+    @collect_queryband(queryband="DF_gt_snpsht")
+    @df_utils.check_otf_dataframe()
+    def get_snapshot(self, as_of):
+        """
+        DESCRIPTION:
+            Gets the data from a DataLake table for the given snapshot id or timestamp string.
+            Notes:
+                * The snapshot id can be obtained from the 'snapshots' property of the DataFrame.
+                * The time travel value represented by 'as_of' should be in the format "YYYY-MM-DD HH:MM:SS.FFFFFFF"
+                  for TIMESTAMP string or "YYYY-MM-DD" for DATE string.
+        PARAMETERS:
+            as_of:
+                Required Argument.
+                Specifies the snapshot id or timestamp information for which the snapshot is to be fetched.
+                Types: str or int
+        RETURNS:
+            teradataml DataFrame.
+        RAISES:
+            TeradataMLException.
+        EXAMPLES:
+            # DataFrame creation on OTF table.
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> in_schema_tbl = in_schema(schema_name="datalake_db",
+            ...                           table_name="datalake_table",
+            ...                           datalake_name="datalake")
+            >>> datalake_df = DataFrame(in_schema_tbl)
+            # List snapshots first.
+            >>> datalake_df.snapshots
+                        snapshotId	  snapshotTimestamp	 timestampMSecs	                                     manifestList	  summary
+               2046682612111137809	2025-06-03 13:26:15	  1748957175692	 s3://vim-iceberg-v1/datalake_db/datalake_table/metadata/snap-204...	{"added-data-files":"Red Inc","added-records"...}
+                282293708812257203	2025-06-03 05:53:19	  1748929999245	 s3://vim-iceberg-v1/datalake_db/datalake_table/metadata/snap-282...    {"added-data-files":"Blue Inc","added-records"...}
+            # Example 1: Get the snapshot using snapshot id.
+            >>> datalake_df.get_snapshot(2046682612111137809)
+                          Feb    Jan    Mar    Apr    datetime
+            accounts
+            Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+            Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+            Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+            Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+            Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+            Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+            # Example 2: Get the snapshot using snapshot id in string format.
+            >>> datalake_df.get_snapshot("2046682612111137809")
+                          Feb    Jan    Mar    Apr    datetime
+            accounts
+            Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+            Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+            Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+            Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+            Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+            Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+            # Example 3: Get the snapshot using timestamp string.
+            >>> datalake_df.get_snapshot("2025-06-03 13:26:16")
+                          Feb    Jan    Mar    Apr    datetime
+            accounts
+            Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+            Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+            Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+            Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+            Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+            Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+            # Example 4: Get the snapshot using date string.
+            >>> datalake_df.get_snapshot("2025-06-04")
+                          Feb    Jan    Mar    Apr    datetime
+            accounts
+            Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+            Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+            Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+            Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+            Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+            Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+        """
+        _Validators._validate_function_arguments([["as_of", as_of, False, (int, str)]])
+        # If already int or string representation of int, return by quoting it
+        if isinstance(as_of, int) or (isinstance(as_of, str) and as_of.isdigit()):
+            snapshot_on = "'{}'".format(as_of)
+        else:
+            try:
+                snapshot_on = UtilFuncs._get_time_formatted_string(as_of)
+            except ValueError as e:
+                raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
+                                                               "get_snapshot", "Invalid value for 'as_of' argument: {}. "
+                                                               "Use valid format [\"YYYY-MM-DD HH:MM:SS.FFFFFFF\", \"YYYY-MM-DD HH:MM:SS\","
+                                                               "\"YYYY-MM-DD\"]".format(as_of)),
+                                          MessageCodes.FUNC_EXECUTION_FAILED)
+        query = SQLBundle()._get_sql_query(SQLConstants.SQL_TD_OTF_SNAPSHOT).format(self._table_name, snapshot_on)
+        try:
+            return DataFrame.from_query(query)
+        except TeradataMlException as e:
+            raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
+                                                           "get_snapshot()", "Invalid value for 'as_of' argument: {}. "
+                                                           "Use valid timestamp or correct snapshot id listed using 'snapshots' property.".format(as_of)),
+                                      MessageCodes.FUNC_EXECUTION_FAILED)
 class DataFrameGroupBy(DataFrame):
     """
@@ -15375,7 +16338,7 @@ class DataFrameGroupBy(DataFrame):
     """
-    def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list, option=None):
+    def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list, option=None, include_grouping_columns=False):
         """
         init() method for DataFrameGroupBy.
@@ -15416,6 +16379,15 @@ class DataFrameGroupBy(DataFrame):
                 Permitted Values: "CUBE", "ROLLUP", None
                 Types: str or NoneType
+            include_grouping_columns:
+                Optional Argument.
+                Specifies whether to include aggregations on the grouping column(s) or not.
+                When set to True, the resultant DataFrame will have the aggregations on the
+                columns mentioned in "columns". Otherwise, resultant DataFrame will not have
+                aggregations on the columns mentioned in "columns".
+                Default Value: False
+                Types: bool
         RETURNS:
             teradataml DataFrameGroupBy instance
         """
@@ -15425,6 +16397,7 @@ class DataFrameGroupBy(DataFrame):
         self._column_names_and_types = column_names_and_types
         self._columns = columns
         self.groupby_column_list = column_list
+        self._include_grouping_columns = include_grouping_columns
     def _get_assign_allowed_types(self):
         """
@@ -15446,7 +16419,7 @@ class DataFrameGroupBy(DataFrame):
         from sqlalchemy.sql.functions import Function
         return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression, ClauseElement)
-    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
+    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, node_id, **kwargs):
         """
         DESCRIPTION:
             Function generates the MetaExpression and AED nodeid for DataFrameGroupBy.assign()
@@ -15459,6 +16432,11 @@ class DataFrameGroupBy(DataFrame):
                 and grouping columns are returned. This is unused argument.
                 Types: bool
+            node_id:
+                Optional Argument.
+                Specifies the input nodeid for the assign operation. This is unused argument.
+                Types: str
             kwargs:
                 keyword, value pairs
                 - keywords are the column names.

teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.4py3-none-any.whl → 20.0.0.6py3-none-any.whl