PyPI - teradataml - Versions diffs - 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl - Mend

teradataml 20.0.0.3py3-none-any.whl → 20.0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +193 -1
teradataml/__init__.py +2 -1
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +25 -18
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
teradataml/analytics/sqle/__init__.py +20 -2
teradataml/analytics/utils.py +15 -1
teradataml/analytics/valib.py +18 -4
teradataml/automl/__init__.py +341 -112
teradataml/automl/autodataprep/__init__.py +471 -0
teradataml/automl/data_preparation.py +84 -42
teradataml/automl/data_transformation.py +69 -33
teradataml/automl/feature_engineering.py +76 -9
teradataml/automl/feature_exploration.py +639 -25
teradataml/automl/model_training.py +35 -14
teradataml/clients/auth_client.py +2 -2
teradataml/common/__init__.py +1 -2
teradataml/common/constants.py +122 -63
teradataml/common/messagecodes.py +14 -3
teradataml/common/messages.py +8 -4
teradataml/common/sqlbundle.py +40 -10
teradataml/common/utils.py +366 -74
teradataml/common/warnings.py +11 -0
teradataml/context/context.py +348 -86
teradataml/data/amazon_reviews_25.csv +26 -0
teradataml/data/apriori_example.json +22 -0
teradataml/data/byom_example.json +11 -0
teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
teradataml/data/hnsw_alter_data.csv +5 -0
teradataml/data/hnsw_data.csv +10 -0
teradataml/data/jsons/byom/h2opredict.json +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +266 -0
teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
teradataml/data/ner_dict.csv +8 -0
teradataml/data/ner_input_eng.csv +7 -0
teradataml/data/ner_rule.csv +5 -0
teradataml/data/pos_input.csv +40 -0
teradataml/data/tdnerextractor_example.json +14 -0
teradataml/data/teradataml_example.json +21 -0
teradataml/data/textmorph_example.json +5 -0
teradataml/data/to_num_data.csv +4 -0
teradataml/data/tochar_data.csv +5 -0
teradataml/data/trans_dense.csv +16 -0
teradataml/data/trans_sparse.csv +55 -0
teradataml/data/vectordistance_example.json +1 -1
teradataml/dataframe/copy_to.py +45 -29
teradataml/dataframe/data_transfer.py +72 -46
teradataml/dataframe/dataframe.py +642 -166
teradataml/dataframe/dataframe_utils.py +167 -22
teradataml/dataframe/functions.py +135 -20
teradataml/dataframe/setop.py +11 -6
teradataml/dataframe/sql.py +330 -78
teradataml/dbutils/dbutils.py +556 -140
teradataml/dbutils/filemgr.py +14 -10
teradataml/hyperparameter_tuner/optimizer.py +12 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
teradataml/opensource/_class.py +141 -17
teradataml/opensource/{constants.py → _constants.py} +7 -3
teradataml/opensource/_lightgbm.py +52 -53
teradataml/opensource/_sklearn.py +1008 -0
teradataml/opensource/_wrapper_utils.py +5 -5
teradataml/options/__init__.py +47 -15
teradataml/options/configure.py +103 -26
teradataml/options/display.py +13 -2
teradataml/plot/axis.py +47 -8
teradataml/plot/figure.py +33 -0
teradataml/plot/plot.py +63 -13
teradataml/scriptmgmt/UserEnv.py +307 -40
teradataml/scriptmgmt/lls_utils.py +428 -145
teradataml/store/__init__.py +2 -3
teradataml/store/feature_store/feature_store.py +102 -7
teradataml/table_operators/Apply.py +48 -19
teradataml/table_operators/Script.py +23 -2
teradataml/table_operators/TableOperator.py +3 -1
teradataml/table_operators/table_operator_util.py +58 -9
teradataml/utils/dtypes.py +49 -1
teradataml/utils/internal_buffer.py +38 -0
teradataml/utils/validators.py +377 -62
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/sklearn/__init__.py +0 -0
teradataml/store/vector_store/__init__.py +0 -1586
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0

teradataml/dataframe/dataframe.py CHANGED Viewed

@@ -20,6 +20,9 @@ import re
 import sqlalchemy
 import sys
 import urllib.parse
+from sqlalchemy import Column
 import teradataml.context.context as tdmlctx
 from collections import OrderedDict, namedtuple
@@ -31,6 +34,7 @@ from teradataml.dataframe.sql_interfaces import ColumnExpression
 from teradataml.dataframe.sql_functions import case
 from teradataml.series.series import Series
 from teradatasqlalchemy.types import _TDType, BIGINT, INTEGER, PERIOD_TIMESTAMP, SMALLINT, BYTEINT, FLOAT, DECIMAL
+from teradataml.common.deprecations import argument_deprecation
 from teradataml.common.utils import UtilFuncs
 from teradataml.common.exceptions import TeradataMlException
 from teradataml.common.messages import Messages
@@ -42,6 +46,7 @@ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils, Dat
 from teradataml.dataframe.indexer import _LocationIndexer
 from teradataml.common.aed_utils import AedUtils
 from teradataml.options.display import display
+from teradataml.options.configure import configure
 from teradataml.dataframe.copy_to import copy_to_sql
 from teradataml.dataframe.row import _Row
 from teradataml.dataframe.setop import concat
@@ -63,7 +68,79 @@ from teradataml.common.constants import OutputStyle
 # TODO use logger when available on master branch
 # logger = teradatapylog.getLogger()
-in_schema = UtilFuncs._in_schema
+class in_schema:
+    """
+    Class takes a schema name, a table name and datalake name attributes
+    and creates an object that can be passed to DataFrame.
+    Note:
+        teradataml recommends to use this class to access table(s)/view(s),
+        from the database other than the default database.
+    """
+    def __init__(self, schema_name, table_name, datalake_name=None):
+        """
+        Constructor for in_schema class.
+        PARAMETERS:
+            schema_name:
+                Required Argument.
+                Specifies the schema where the table resides in.
+                Types: str
+            table_name:
+                Required Argument.
+                Specifies the table name or view name in Vantage.
+                Types: str
+            datalake_name:
+                Optional Argument.
+                Specifies the datalake name.
+                Types: str
+        EXAMPLES:
+            from teradataml.dataframe.dataframe import in_schema, DataFrame
+            # Example 1: The following example creates a DataFrame from the
+            #            existing Vantage table "dbcinfo" in the non-default
+            #            database "dbc" using the in_schema instance.
+            df = DataFrame(in_schema("dbc", "dbcinfo"))
+            # Example 2: The following example uses from_table() function, existing
+            #            Vantage table "dbcinfo" and non-default database "dbc" to
+            #            create a teradataml DataFrame.
+            df = DataFrame.from_table(in_schema("dbc","dbcinfo"))
+            # Example 3: The following example uses "in_schema" object created
+            #            with "datalake_name" argument to create DataFrame on OTF table.
+            otf_df = DataFrame(in_schema("datalake_db","datalake_table","datalake"))
+        """
+        self.schema_name = schema_name
+        self.table_name = table_name
+        self.datalake_name = datalake_name
+        awu_matrix = []
+        awu_matrix.append(["schema_name", schema_name, False, (str), True])
+        awu_matrix.append(["table_name", table_name, False, (str), True])
+        awu_matrix.append(["datalake_name", datalake_name, True, (str), True])
+        # Validate argument types
+        _Validators._validate_function_arguments(awu_matrix)
+    def __str__(self):
+        """
+        Returns the string representation of in_schema instance.
+        """
+        tbl_name = '{}.{}'.format(UtilFuncs._teradata_quote_arg(self.schema_name, "\"", False),
+                                  UtilFuncs._teradata_quote_arg(self.table_name, "\"", False))
+        if not self.datalake_name:
+            return tbl_name
+        return '{}.{}'.format(UtilFuncs._teradata_quote_arg(self.datalake_name, "\"", False), tbl_name)
+in_schema = in_schema
 class DataFrame():
@@ -166,6 +243,24 @@ class DataFrame():
         # Property to determine if table is an ART table or not.
         self._is_art = None
+        # This attribute stores the previous assign arguments in continuous assign calls.
+        self._previous_assign_args = None
+        # This attribute stores the root DataFrame columns.
+        self._root_columns = None
+        self._datalake = None
+        self._database = None
+        self._table = None
+        self._otf = False
+        if isinstance(table_name, in_schema):
+            self._table = table_name.table_name
+            self._datalake = table_name.datalake_name
+            self._database = table_name.schema_name
+            self._otf = True if self._datalake else False
+        table_name = str(table_name) if isinstance(table_name, in_schema) else table_name
         # Below matrix is list of list, where in each row contains following elements:
         # Let's take an example of following, just to get an idea:
         #   [element1, element2, element3, element4, element5, element6]
@@ -198,25 +293,45 @@ class DataFrame():
                 self._source_type = SourceType.TABLE.value
                 self._nodeid = self._aed_utils._aed_table(self._table_name)
             elif query is not None:
+                query = query.strip()
+                query = query[:-1] if query[-1] == ";" else query
                 self._query = query
                 self._source_type = SourceType.QUERY.value
-                if materialize:
-                    # If user requests to materialize the the query, then we should create a
+                temp_obj_params = {
+                    "prefix": "_frmqry_v",
+                    "use_default_database": True,
+                    "quote": False
+                }
+                __execute = UtilFuncs._create_view
+                if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
+                    # If user requests to materialize the query, then we should create a
+                    # volatile table if user intends to the same instead of view.
+                    # Volatile table does not need to be added to the GC.
+                    temp_obj_params["table_type"] = TeradataConstants.TERADATA_VOLATILE_TABLE
+                    temp_obj_params["gc_on_quit"] = False
+                    temp_obj_params["prefix"] = "_frmqry_vt"
+                    __execute = UtilFuncs._create_table
+                elif materialize:
+                    # If user requests to materialize the query, then we should create a
                     # table instead of view and add the same in the GarbageCollector.
-                    temp_table_name = UtilFuncs._generate_temp_table_name(prefix="_frmqry_t", use_default_database=True,
-                                                                          quote=False,
-                                                                          table_type=TeradataConstants.TERADATA_TABLE)
-                else:
-                    temp_table_name = UtilFuncs._generate_temp_table_name(prefix="_frmqry_v", use_default_database=True,
-                                                                          quote=False)
+                    temp_obj_params["table_type"] = TeradataConstants.TERADATA_TABLE
+                    temp_obj_params["gc_on_quit"] = True
+                    temp_obj_params["prefix"] = "_frmqry_t"
+                    __execute = UtilFuncs._create_table
+                temp_table_name = UtilFuncs._generate_temp_table_name(**temp_obj_params)
                 self._table_name = temp_table_name
+                __execute_params = (self._table_name, self._query)
+                if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
+                    __execute_params = (self._table_name, self._query, True)
                 try:
-                    if materialize:
-                        UtilFuncs._create_table(self._table_name, self._query)
-                    else:
-                        UtilFuncs._create_view(self._table_name, self._query)
+                    __execute(*__execute_params)
                 except OperationalError as oe:
                     if "[Error 3707] Syntax error" in str(oe):
                         raise ValueError(Messages.get_message(
@@ -245,6 +360,9 @@ class DataFrame():
             self.__data = None
             self.__data_columns = None
             self._alias = None
+            self._plot = None
+            self._eda_ui = None
         except TeradataMlException:
             raise
@@ -334,7 +452,9 @@ class DataFrame():
         _Validators._validate_function_arguments(arg_info_matrix)
         try:
             alias_df = self._from_node(self._nodeid, self._metaexpr, self._index_label,
-                                              reuse_metaexpr=False)
+                                       reuse_metaexpr=False, _datalake=self._datalake,
+                                       _database=self._database, _table=self._table,
+                                       _otf=self._otf)
             # Assigning self attributes to newly created alias dataframe.
             alias_df._table_name = self._table_name
             alias_df._index = self._index
@@ -350,7 +470,8 @@ class DataFrame():
     @classmethod
     @collect_queryband(queryband="DF_fromTable")
-    def from_table(cls, table_name, index=True, index_label=None):
+    def from_table(cls, table_name, index=True, index_label=None,
+                   schema_name=None, datalake_name=None):
         """
         Class method for creating a DataFrame from a table or a view.
@@ -371,30 +492,48 @@ class DataFrame():
                 Column/s used for sorting.
                 Types: str
+            schema_name:
+                Optional Argument.
+                Specifies the schema where the table resides.
+                Types: str
+            datalake_name:
+                Optional Argument.
+                Specifies the datalake name.
+                Types: str
         EXAMPLES:
-            from teradataml.dataframe.dataframe import DataFrame
+            >>> from teradataml.dataframe.dataframe import DataFrame
             # Example 1: The following example creates a DataFrame from a table or
                          a view.
             # Load the example data.
-            load_example_data("dataframe","sales")
+            >>> load_example_data("dataframe","sales")
             # Create DataFrame from table
-            df = DataFrame.from_table('sales')
+            >>> df = DataFrame.from_table('sales')
             # Create DataFrame from table and without index column sorting.
-            df = DataFrame.from_table("sales", False)
+            >>> df = DataFrame.from_table("sales", False)
             # Create DataFrame from table and sorting using the 'accounts'
             # column.
-            df = DataFrame.from_table("sales", True, "accounts")
+            >>> df = DataFrame.from_table("sales", True, "accounts")
             # Example 2: The following example creates a DataFrame from existing Vantage
             #            table "dbcinfo" in the non-default database "dbc" using the
             #            in_schema() function.
-            from teradataml.dataframe.dataframe import in_schema
-            df = DataFrame.from_table(in_schema("dbc", "dbcinfo"))
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> df = DataFrame.from_table(in_schema("dbc", "dbcinfo"))
+            # Example 3: Create a DataFrame on existing DataLake
+            #            table "lake_table" in the "datalake_database" database
+            #            in "datalake" datalake.
+            >>> datalake_df = DataFrame.from_table(table_name="lake_table",
+            ...                                    schema_name="datalake_database",
+            ...                                    datalake_name="datalake" )
         RETURNS:
             DataFrame
@@ -403,6 +542,9 @@ class DataFrame():
             TeradataMlException - TDMLDF_CREATE_FAIL
         """
+        if schema_name:
+            return cls(in_schema(schema_name, table_name, datalake_name))
         return cls(table_name, index, index_label)
     @classmethod
@@ -462,7 +604,7 @@ class DataFrame():
         return cls(index=index, index_label=index_label, query=query, materialize=materialize)
     @classmethod
-    def _from_node(cls, nodeid, metaexpr, index_label=None, undropped_index=None, reuse_metaexpr=True):
+    def _from_node(cls, nodeid, metaexpr, index_label=None, undropped_index=None, reuse_metaexpr=True, **kwargs):
         """
         Private class method for creating a DataFrame from a nodeid and parent metadata.
@@ -543,6 +685,11 @@ class DataFrame():
                                                  in [col.name for col in df._metaexpr.c] for elem in undropped_index):
             df._undropped_index = undropped_index
+        # Populate remaining attributes.
+        for arg in kwargs:
+            # Pop each argument from kwargs and assign to new DataFrame.
+            arg_value = kwargs.get(arg)
+            df.__setattr__(arg, arg_value)
         return df
     def create_temp_view(self, name):
@@ -670,9 +817,10 @@ class DataFrame():
         return self
     @collect_queryband(queryband="DF_fillna")
-    def fillna(self, value=None, columns=None, literal_value=False):
+    def fillna(self, value=None, columns=None, literal_value=False, partition_column=None):
         """
-        Method to replace the null values in a column with the value specified.
+        DESCRIPTION:
+            Method to replace the null values in a column with the value specified.
         PARAMETERS:
             value:
@@ -705,6 +853,12 @@ class DataFrame():
                 Default Value: False
                 Types: bool
+            partition_column:
+                Optional Argument.
+                Specifies the column name to partition the data.
+                Default Value: None
+                Types: str
         RETURNS:
             teradataml DataFrame
@@ -745,6 +899,26 @@ class DataFrame():
                 3    Blue Inc   90.0   50   95.0  101.0  17/01/04
                 4    Alpha Co  210.0  200  215.0  250.0  17/01/04
                 5  Orange Inc  210.0   50    NaN  250.0  17/01/04
+            # Example 3: Populate the null value in 'pclass' and
+            #            'fare' column with mean value with partition
+            #            column as 'sex'.
+            # Load the example data.
+            >>> load_example_data("teradataml", ["titanic"])
+            >>> df = DataFrame.from_table("titanic")
+            >>> df.fillna(value="mean", columns=["pclass", "fare"], partition_column="sex")
+                passenger  survived  pclass                                         name     sex   age  sibsp  parch            ticket      fare cabin embarked
+            0        284         1       3                   Dorking, Mr. Edward Arthur    male  19.0      0      0        A/5. 10482    8.0500  None        S
+            1        589         0       3                        Gilinski, Mr. Eliezer    male  22.0      0      0             14973    8.0500  None        S
+            2         17         0       3                         Rice, Master. Eugene    male   2.0      4      1            382652   29.1250  None        Q
+            3        282         0       3             Olsson, Mr. Nils Johan Goransson    male  28.0      0      0            347464    7.8542  None        S
+            4        608         1       1                  Daniel, Mr. Robert Williams    male  27.0      0      0            113804   30.5000  None        S
+            5        404         0       3               Hakkarainen, Mr. Pekka Pietari    male  28.0      1      0  STON/O2. 3101279   15.8500  None        S
+            6        427         1       2  Clarke, Mrs. Charles V (Ada Maria Winfield)  female  28.0      1      0              2003   26.0000  None        S
+            7        141         0       3                Boulos, Mrs. Joseph (Sultana)  female   NaN      0      2              2678   15.2458  None        C
+            8        610         1       1                    Shutes, Miss. Elizabeth W  female  40.0      0      0          PC 17582  153.4625  C125        S
+            9        875         1       2        Abelson, Mrs. Samuel (Hannah Wizosky)  female  28.0      1      0         P/PP 3381   24.0000  None        C
         """
         from teradataml import SimpleImputeFit, SimpleImputeTransform
@@ -752,6 +926,7 @@ class DataFrame():
         arg_info_matrix.append(["value", value, True, (int, float, str, dict, list)])
         arg_info_matrix.append(["columns", columns, True, (list, str, tuple)])
         arg_info_matrix.append(["literal_value", literal_value, True, (bool)])
+        arg_info_matrix.append(["partition_column", partition_column, True, (str)])
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_matrix)
@@ -823,9 +998,15 @@ class DataFrame():
                                   literals=literals,
                                   literals_columns=literals_columns,
                                   stats=stats,
-                                  stats_columns=stats_columns)
+                                  stats_columns=stats_columns,
+                                  partition_column=partition_column)
-        return fit_obj.transform(data=self).result
+        impute_transform = {
+            'data': self,
+            'data_partition_column': partition_column,
+            'object_partition_column': partition_column}
+        return fit_obj.transform(**impute_transform).result
     def __execute_node_and_set_table_name(self, nodeid, metaexpr=None):
         """
@@ -924,6 +1105,7 @@ class DataFrame():
         self._column_names_and_types = []
         self._td_column_names_and_types = []
         self._td_column_names_and_sqlalchemy_types = {}
+        self._column_types = {}
         for col in self._metaexpr.c:
             if isinstance(col.type, sqlalchemy.sql.sqltypes.NullType):
@@ -931,9 +1113,11 @@ class DataFrame():
             else:
                 tdtype = "{}".format(col.type)
-            self._column_names_and_types.append((str(col.name), UtilFuncs._teradata_type_to_python_type(col.type)))
+            py_type = UtilFuncs._teradata_type_to_python_type(col.type)
+            self._column_names_and_types.append((str(col.name), py_type))
             self._td_column_names_and_types.append((str(col.name), tdtype))
             self._td_column_names_and_sqlalchemy_types[(str(col.name)).lower()] = col.type
+            self._column_types[(str(col.name)).lower()] = [py_type, col.type]
     def _get_metaexpr(self):
         """
@@ -952,7 +1136,24 @@ class DataFrame():
         meta = sqlalchemy.MetaData()
         db_schema = UtilFuncs._extract_db_name(self._table_name)
         db_table_name = UtilFuncs._extract_table_name(self._table_name)
-        t = sqlalchemy.Table(db_table_name, meta, schema=db_schema, autoload_with=eng)
+        if not self._datalake:
+            t = sqlalchemy.Table(db_table_name, meta, schema=db_schema, autoload_with=eng)
+            return _MetaExpression(t)
+        # Get metaexpression for datalake table.
+        # check existence of datalake table.
+        tdmlctx.get_connection().dialect.has_table(tdmlctx.get_connection(),
+                                                   self._table,
+                                                   schema=self._database,
+                                                   table_only=True,
+                                                   datalake=self._datalake)
+        # Extract column names and corresponding teradatasqlalchemy types.
+        col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
+                                                                          self._table,
+                                                                          self._datalake)
+        t = sqlalchemy.Table(self._table, meta, schema=self._database,
+                             *(Column(col_name, col_type) for col_name, col_type in zip(col_names, col_types)))
         return _MetaExpression(t)
     def __getattr__(self, name):
@@ -2729,8 +2930,8 @@ class DataFrame():
             raise TeradataMlException(msg, errcode)
     @collect_queryband(queryband="DF_describe")
-    def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None,
-                 columns=None):
+    def describe(self, percentiles=[.25, .5, .75], verbose=False, distinct=False, statistics=None,
+                 columns=None, pivot=False):
         """
         DESCRIPTION:
             Generates statistics for numeric columns. This function can be used in two modes:
@@ -2759,18 +2960,6 @@ class DataFrame():
                 Default Values: [.25, .5, .75], which returns the 25th, 50th, and 75th percentiles.
                 Types: float or List of floats
-            include:
-                Optional Argument.
-                Values can be either None or "all".
-                If the value is "all", then both numeric and non-numeric columns are included.
-                Computes count, mean, std, min, percentiles, and max for numeric columns.
-                Computes count and unique for non-numeric columns.
-                If the value is None, only numeric columns are used for collecting statistics.
-                Note:
-                    Value 'all' is not applicable for 'Time Series Aggregate Mode'.
-                Default Values: None
-                Types: str
             verbose:
                 Optional Argument.
                 Specifies a boolean value to be used for time series aggregation, stating whether to get
@@ -2797,7 +2986,6 @@ class DataFrame():
                 Computes count and unique for non-numeric columns.
                 Notes:
                     1. statistics is not applicable for 'Time Series Aggregate Mode'.
-                    2. statistics should not be used with include as 'all'.
                 Permitted Values: count, mean, min, max, unique, std, describe, percentile
                 Default Values: None
                 Types: str or List of str
@@ -2807,7 +2995,14 @@ class DataFrame():
                 Specifies the name(s) of the columns we are collecting statistics for.
                 Default Values: None
                 Types: str or List of str
+            pivot:
+                Optional Argument.
+                Specifies a boolean value to pivot the output.
+                Note:
+                    * "pivot" is not supported for PTI tables.
+                Default Values: 'False'
+                Types: bool
         RETURNS:
             teradataml DataFrame
@@ -2829,7 +3024,7 @@ class DataFrame():
             Orange Inc  210.0  None  None   250  04/01/2017
             # Computes count, mean, std, min, percentiles, and max for numeric columns.
-            >>> df.describe()
+            >>> df.describe(pivot=True)
                       Apr      Feb     Mar     Jan
             func
             count       4        6       4       4
@@ -2841,8 +3036,45 @@ class DataFrame():
             75%       250    207.5  158.75   162.5
             max       250      210     215     200
+            # Computes count, mean, std, min, percentiles, and max for numeric columns with
+            # default arugments.
+            >>> df.describe()
+            ATTRIBUTE   StatName            StatValue
+            Jan	        MAXIMUM	            200.0
+            Jan	        STANDARD DEVIATION	62.91528696058958
+            Jan	        PERCENTILES(25)	    125.0
+            Jan	        PERCENTILES(50)	    150.0
+            Mar	        COUNT	            4.0
+            Mar	        MINIMUM	            95.0
+            Mar	        MAXIMUM	            215.0
+            Mar	        MEAN	            147.5
+            Mar	        STANDARD DEVIATION	49.749371855331
+            Mar	        PERCENTILES(25)	    128.75
+            Mar	        PERCENTILES(50)	    140.0
+            Apr	        COUNT	            4.0
+            Apr	        MINIMUM	            101.0
+            Apr	        MAXIMUM	            250.0
+            Apr	        MEAN	            195.25
+            Apr	        STANDARD DEVIATION	70.97123830585646
+            Apr	        PERCENTILES(25)	    160.25
+            Apr	        PERCENTILES(50)	    215.0
+            Apr	        PERCENTILES(75)	    250.0
+            Feb	        COUNT	            6.0
+            Feb	        MINIMUM	            90.0
+            Feb	        MAXIMUM	            210.0
+            Feb	        MEAN	            166.66666666666666
+            Feb	        STANDARD DEVIATION	59.553897157672786
+            Feb	        PERCENTILES(25)	    117.5
+            Feb	        PERCENTILES(50)	    200.0
+            Feb	        PERCENTILES(75)	    207.5
+            Mar	        PERCENTILES(75)	    158.75
+            Jan	        PERCENTILES(75)	    162.5
+            Jan	        MEAN	            137.5
+            Jan	        MINIMUM	            50.0
+            Jan	        COUNT	            4.0
             # Computes count, mean, std, min, percentiles, and max for numeric columns with 30th and 60th percentiles.
-            >>> df.describe(percentiles=[.3, .6])
+            >>> df.describe(percentiles=[.3, .6], pivot=True)
                       Apr      Feb     Mar     Jan
             func
             count       4        6       4       4
@@ -2855,7 +3087,7 @@ class DataFrame():
             # Computes count, mean, std, min, percentiles, and max for numeric columns group by "datetime" and "Feb".
             >>> df1 = df.groupby(["datetime", "Feb"])
-            >>> df1.describe()
+            >>> df1.describe(pivot=True)
                                      Jan   Mar   Apr
             datetime   Feb   func
             04/01/2017 90.0  25%      50    95   101
@@ -2883,22 +3115,6 @@ class DataFrame():
                              min     200   215   250
                              std    None  None     0
-            # Computes count, mean, std, min, percentiles, and max for numeric columns and
-            # computes count and unique for non-numeric columns
-            >>> df.describe(include="all")
-                   accounts      Feb     Jan     Mar     Apr datetime
-            func
-            25%        None    117.5     125  128.75  160.25     None
-            75%        None    207.5   162.5  158.75     250     None
-            count         6        6       4       4       4        6
-            mean       None  166.667   137.5   147.5  195.25     None
-            max        None      210     200     215     250     None
-            min        None       90      50      95     101     None
-            50%        None      200     150     140     215     None
-            std        None   59.554  62.915  49.749  70.971     None
-            unique        6     None    None    None    None        1
-            #
             # Examples for describe() function as Time Series Aggregate.
             #
             >>> # Load the example datasets.
@@ -3081,15 +3297,15 @@ class DataFrame():
             >>>
         """
-        # Argument validations
+        # -------------Argument validations---------------#
         awu_matrix = []
         awu_matrix.append(["columns", columns, True, (str, list), True])
         awu_matrix.append(["percentiles", percentiles, True, (float, list)])
-        awu_matrix.append(["include", include, True, (str), True, [None, "all"]])
         awu_matrix.append(["verbose", verbose, True, (bool)])
         awu_matrix.append(["distinct", distinct, True, (bool)])
         awu_matrix.append(["statistics", statistics, True, (str, list), True,
                            ["count", "mean", "min", "max", "unique", "std", "describe", "percentile"]])
+        awu_matrix.append(["pivot", pivot, True, (bool)])
         # Validate argument types
         _Validators._validate_function_arguments(awu_matrix)
@@ -3108,22 +3324,11 @@ class DataFrame():
         if statistics:
             statistics = [stats.lower() for stats in UtilFuncs._as_list(statistics)]
-        # Argument include and statistics should not be used together
-        if include is not None and statistics is not None:
-            raise ValueError(Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH).format(
-                'include', 'statistics'
-            ))
         # Percentiles must be a list of values between 0 and 1.
         if not isinstance(percentiles, list) or not all(p > 0 and p < 1 for p in percentiles):
             raise ValueError(Messages.get_message(MessageCodes.INVALID_ARG_VALUE, percentiles, "percentiles",
                                                   "percentiles must be a list of values between 0 and 1"))
-        # Argument 'include' with value 'all' is not allowed for DataFrameGroupByTime
-        if include is not None and include.lower() == "all" and isinstance(self, DataFrameGroupByTime):
-            raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
-                'include', 'Aggregation', 'all', 'describe()', 'DataFrame or DataFrameGroupBy'))
         # Argument 'statistics' is not allowed for DataFrameGroupByTime
         if statistics is not None and isinstance(self, DataFrameGroupByTime):
             raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
@@ -3133,26 +3338,31 @@ class DataFrame():
         if verbose and not isinstance(self, DataFrameGroupByTime):
             raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
                 'verbose', 'Aggregation', 'True', 'describe()', 'DataFrameGroupByTime'))
+        # -------------End of argument validations---------------#
         function_label = "func"
+        sort_cols = []
         try:
             self.__execute_node_and_set_table_name(self._nodeid)
             groupby_column_list = None
-            if isinstance(self, DataFrameGroupBy):
+            if isinstance(self, DataFrameGroupByTime) or isinstance(self, DataFrameGroupBy):
                 groupby_column_list = self.groupby_column_list
-                df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
-                                                  groupby_column_list=groupby_column_list)
+                if columns:
+                    df_utils._validate_describe_columns(columns=columns, metaexpr=self._metaexpr,
+                                                        groupby_column_list=groupby_column_list)
+                sort_cols = list(groupby_column_list)
-            if isinstance(self, DataFrameGroupByTime):
-                groupby_column_list = self.groupby_column_list
-                df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
-                                                  groupby_column_list=groupby_column_list)
+            # 'func' column will be always there in result.
+            sort_cols.append(function_label)
+            # Handle DataFrameGroupByTime using union all approach and
+            # other DataFrames using TD_UnivariateStatistics approach.
+            if isinstance(self, DataFrameGroupByTime):
                 # Construct the aggregate query.
                 agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
                                                                percentiles=percentiles, function_label=function_label,
-                                                               groupby_column_list=groupby_column_list, include=include,
+                                                               groupby_column_list=groupby_column_list, include=None,
                                                                is_time_series_aggregate=True, verbose=verbose,
                                                                distinct=distinct,
                                                                timebucket_duration=self._timebucket_duration,
@@ -3160,29 +3370,99 @@ class DataFrame():
                                                                timecode_column=self._timecode_column,
                                                                sequence_column=self._sequence_column,
                                                                fill=self._fill)
+                if groupby_column_list is not None:
+                    df = DataFrame.from_query(agg_query, index_label=sort_cols)
+                    df2 = df.sort(sort_cols)
+                    df2._metaexpr._n_rows = 100
+                    describe_df = df2
+                else:
+                    describe_df = DataFrame.from_query(agg_query, index_label=function_label)
+                # Check if numeric overflow can occur for result DataFrame.
+                if self._check_numeric_overflow(describe_df):
+                    result_df = self._promote_dataframe_types()
+                    describe_df = result_df.describe(pivot=True)
+                return describe_df
             else:
-                # Construct the aggregate query.
-                agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
-                                                               percentiles=percentiles, function_label=function_label,
-                                                               groupby_column_list=groupby_column_list, include=include,
-                                                               is_time_series_aggregate=False, verbose=verbose,
-                                                               distinct=distinct, statistics=statistics)
-            if groupby_column_list is not None:
-                sort_cols = [i for i in groupby_column_list]
-                sort_cols.append(function_label)
-                df = DataFrame.from_query(agg_query, index_label=sort_cols)
-                df2 = df.sort(sort_cols)
-                df2._metaexpr._n_rows = 100
-                describe_df = df2
-            else:
-                describe_df = DataFrame.from_query(agg_query, index_label=function_label)
+                # If pivot is True, then construct the aggregate query and return the result DataFrame.
+                # Otherwise, return the result DataFrame in the regular aggregate mode using UnivariateStatistics.
+                if pivot:
+                    # Construct the aggregate query.
+                    agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
+                                                                percentiles=percentiles, function_label=function_label,
+                                                                groupby_column_list=groupby_column_list, include=None,
+                                                                is_time_series_aggregate=False, verbose=verbose,
+                                                                distinct=distinct, statistics=statistics)
+                    if groupby_column_list is not None:
+                        sort_cols = [i for i in groupby_column_list]
+                        sort_cols.append(function_label)
+                        df = DataFrame.from_query(agg_query, index_label=sort_cols)
+                        df2 = df.sort(sort_cols)
+                        df2._metaexpr._n_rows = 100
+                        describe_df = df2
+                    else:
+                        describe_df = DataFrame.from_query(agg_query, index_label=function_label)
+                    # Check if numeric overflow can occur for result DataFrame.
+                    if self._check_numeric_overflow(describe_df):
+                        result_df = self._promote_dataframe_types()
+                        describe_df = result_df.describe(pivot=True)
+                    return describe_df
+                # If columns is None, then all dataframe columns are considered.
+                if columns is None:
+                    columns = self.columns
+                    # Exclude groupby columns
+                    if groupby_column_list is not None:
+                        columns = [col for col in columns if col not in groupby_column_list]
+                numeric_cols = []
+                # Extract numeric columns and their types of all columns
+                for col in self._metaexpr.c:
+                    if type(col.type) in UtilFuncs()._get_numeric_datatypes() and \
+                    col.name in columns:
+                        numeric_cols.append(col.name)
+                if numeric_cols:
+                    # Default statistics for 'Regular Aggregate Mode'
+                    sql_stat = ["COUNT", "MAXIMUM", "MEAN", "MINIMUM", "PERCENTILES", "STANDARD DEVIATION"]
+                    if statistics is not None:
+                        py_to_sql_func_map = {"count": "COUNT",
+                                              "max": "MAXIMUM",
+                                              "mean": "MEAN",
+                                              "unique": 'UNIQUE ENTITY COUNT',
+                                              "min": "MINIMUM",
+                                              "percentile": "PERCENTILES",
+                                              "std": "STANDARD DEVIATION"}
+                        # Convert statistics into corresponding SQL function names
+                        sql_stat = [py_to_sql_func_map[stat] for stat in UtilFuncs()._as_list(statistics)]
+                    # Convert percentiles to centiles for univariate statistics
+                    centiles = list(map(lambda n: int(n * 100), percentiles))
+                    # UnivariateStatistics parameters
+                    univar_param = {
+                        "newdata": self.select(self.columns),
+                        "target_columns": numeric_cols,
+                        "partition_columns": groupby_column_list,
+                        "centiles": centiles,
+                        "stats": sql_stat
+                    }
+                    from teradataml import UnivariateStatistics
+                    # Run UnivariateStatistics
+                    aggr_df = UnivariateStatistics(**univar_param).result
+                    # Return the result in teradataml format
+                    return aggr_df
-            # Check if numeric overflow can occur for result DataFrame.
-            if self._check_numeric_overflow(describe_df):
-                result_df = self._promote_dataframe_types()
-                describe_df = result_df.describe()
-            return describe_df
         except TeradataMlException:
             raise
         except Exception as err:
@@ -5269,8 +5549,10 @@ class DataFrame():
                 Specifies the function(s) to apply on DataFrame columns.
                 Valid values for func are:
-                    'count', 'sum', 'min', 'max', 'mean', 'std', 'percentile', 'unique',
-                    'median', 'var'
+                    * 'count', 'sum', 'min', 'max', 'mean', 'std', 'percentile', 'percentile_<floatvalue>', 'unique',
+                      'median', 'var'
+                    * Note: In 'percentile_<floatvalue>', <floatvalue> specifies the desired percentile value to
+                            calculate aggregate. It should be in the range of 0.0 to 1.0 (both inclusive).
                 Acceptable formats for function(s) are
                     string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
@@ -5304,12 +5586,17 @@ class DataFrame():
                         Output column names after the above operation are:
                           min_employee_no, sum_employee_no, var_employee_no, min_first_name
-                    4. "func" passed as a ColumnExpression built using the aggregate functions.
+                    4. "percentile_<floatvalue>" passed to agg.
+                        >>> df.agg({'employee_no' : ['percentile_0.25', 'percentile_0.75', 'min']})
+                        >>> df.agg(['percentile_0.25', 'percentile_0.75', 'sum'])
+                        >>> df.agg('percentile_0.25')
+                    5. "func" passed as a ColumnExpression built using the aggregate functions.
                         >>> df.agg(df.first_name.count())
                         Output column name after the above operation is:
                           count(first_name)
-                    5. "func" passed as a list of ColumnExpression built using the aggregate functions.
+                    6. "func" passed as a list of ColumnExpression built using the aggregate functions.
                         >>> df.agg([df.employee_no.min(), df.first_name.count()])
                         Output column names after the above operation are:
                           min(employee_no), count(first_name)
@@ -5397,6 +5684,12 @@ class DataFrame():
               min_employee_no sum_employee_no  var_employee_no min_first_name
             0             100             313        44.333333           abcd
+            # Get the minimum, 25 percentile value and variance of employee number, by passing dictionary of
+            # column names to string function/list of string functions as parameter.
+            >>> df.agg({'employee_no' : ['min', 'percentile_0.25', 'var']})
+              min_employee_no  percentile_0.25_employee_no  var_employee_no
+            0              100                          100        44.333333
             # Get the minimum and sum of all the columns in the dataframe,
             # by passing list of string functions as parameter.
             >>> df.agg(['min', 'sum'])
@@ -5442,9 +5735,15 @@ class DataFrame():
                mean_employee_no unique_employee_no unique_first_name mean_joined_date unique_joined_date
             0        104.333333                  3                 2         60/12/04                  2
+            # Get the percentile of each column in the dataframe with default value 0.5.
             >>> df.agg('percentile')
-                  percentile_employee_no percentile_marks
-                0                    101             None
+                percentile_employee_no percentile_marks
+            0                    101             None
+            # Get 80 percentile of each column in the datafame.
+            >>> df.agg('percentile_0.8')
+               percentile_0.8_employee_no percentile_0.8_marks
+            0                         107                 None
             # Using another table 'sales' (having repeated values) to demonstrate operations
             # 'unique' and 'percentile'.
@@ -5461,9 +5760,11 @@ class DataFrame():
                 Blue Inc     90.0    50    95   101  2017-04-01
                 Red Inc     200.0   150   140  None  2017-04-01
-            >>> df.agg('percentile')
-                   percentile_Feb percentile_Jan percentile_Mar percentile_Apr
-                0           200.0            150            140            215
+            # Get 80 and 40 percentile values of each column in the dataframe.
+            >>> df1 = df.select(['Feb', 'Jan', 'Mar', 'Apr'])
+            >>> df1.agg(['percentile_0.8', 'percentile_0.4'])
+                percentile_0.8_Feb  percentile_0.4_Feb  percentile_0.8_Jan  percentile_0.4_Jan  percentile_0.8_Mar  percentile_0.4_Mar  percentile_0.8_Apr  percentile_0.4_Apr
+            0               210.0               200.0                 170                 150                 170                 140                 250                 194
             >>> df.agg('unique')
                   unique_accounts unique_Feb unique_Jan unique_Mar unique_Apr unique_datetime
@@ -5650,6 +5951,8 @@ class DataFrame():
         except TeradataMlException:
             raise
+        except ValueError:
+            raise
         except Exception as err:
             raise TeradataMlException(Messages.get_message(
                 MessageCodes.EXECUTION_FAILED, "perform {} on DataFrame".format(operation), str(err)),
@@ -5765,7 +6068,35 @@ class DataFrame():
     def _repr_html_(self):
         """ Print method for teradataml for iPython rich display. """
+        self._generate_output_html()
+        if display.enable_ui:
+            # EDA Ui widget representation using teradatamlwidgets
+            if self._eda_ui is None:
+                from teradatamlwidgets.eda.Ui import Ui
+                self._eda_ui = Ui(df=self, html=self.html)
+            else:
+                self._eda_ui.display_ui()
+        return self.html
+    def get_eda_ui(self):
+        """
+        Returns the EDA representation UI.
+        PARAMETERS:
+            None.
+        EXCEPTIONS:
+            None.
+        RETURNS:
+            teradatamlwidgets.eda.Ui
+        EXAMPLE:
+            df = ui.get_eda_ui()
+        """
+        return self._eda_ui
+    def _generate_output_html(self, disable_types=True):
         # Check if class attributes __data and __data_columns are not None.
         # If not None, reuse the data and columns.
         # If None, generate latest results.
@@ -5778,17 +6109,25 @@ class DataFrame():
         dindent = indent + indent
         header_html = ['<style type="text/css">',
-                       'table {border:ridge 5px;}',
+                       'table { border:ridge 5px}',
                        'table td {border:inset 1px;}',
-                       'table tr#HeaderRow {background-color:grey; color:white;}'
+                       'table tr#HeaderRow {background-color:grey; color:white;}',
                        '</style>\n'
                        ]
         html = "\n{0}".format(indent).join(header_html)
-        html += '<html><table>\n{0}<tr id="HeaderRow">\n'.format(indent)
+        html += '<html><table style="min-width:1000px;">\n{0}<tr id="HeaderRow">\n'.format(indent)
-        columns_html = "</th>\n{0}<th>".format(dindent).join(self.__data_columns)
-        html += "{0}<th>{1}</th>\n".format(dindent, columns_html)
-        html += "{0}</tr>\n".format(indent)
+        columns_html = "</th><th>".join(self.__data_columns)
+        html += "<th>{0}</th>\n".format(columns_html)
+        html += "</tr>\n"
+        if not disable_types:
+            html += '<tr>\n'.format(indent)
+            col_types = [repr(self._td_column_names_and_sqlalchemy_types[column]) for column in
+                         self.__data_columns]
+            columns_types_html = "</td>\n{0}<td>".format(dindent).join(col_types)
+            html += "{0}<td>{1}</td>\n".format(dindent, columns_types_html)
+            html += "{0}</tr>\n".format(indent)
         for row in self.__data:
             row_html = ["{0}<td>{1}</td>\n".format(dindent,
@@ -5796,8 +6135,31 @@ class DataFrame():
             html += "{1}<tr>\n{0}{1}</tr>\n".format("".join(row_html), indent)
         html += "</table></html>"
+        self.html = html
-        return html
+    def get_output(self, output_index=0):
+        """
+        DESCRIPTION:
+            Returns the result of analytic function when analytic function is
+            run from 'Analyze' tab in EDA UI.
+            Note:
+                * The function does not return anything if analytic function is
+                  not run from EDA UI.
+        PARAMETERS:
+            output_index:
+                Optional Argument.
+                Specifies the index of the output dataframe to be returned.
+                Default Value: 0
+                Types: int
+        RAISES:
+            IndexError
+        RETURNS:
+            teradataml DataFrame object.
+        """
+        return self._eda_ui.get_output_dataframe(output_index=output_index)
     def __get_data_columns(self):
         """
@@ -6857,7 +7219,8 @@ class DataFrame():
                         compiled_condition = condition.compile(compile_kwargs={'include_table': True,
                                                                                'literal_binds': True,
                                                                                'table_name_kind': '_join_alias',
-                                                                               'compile_with_caller_table': True})
+                                                                               'compile_with_caller_table': True,
+                                                                               'table_only': True})
                         all_join_conditions.append(compiled_condition)
@@ -7399,7 +7762,7 @@ class DataFrame():
         """
         return (type(None), int, float, str, decimal.Decimal, ColumnExpression, ClauseElement)
-    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
+    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, node_id, **kwargs):
         """
         DESCRIPTION:
             Function generates the MetaExpression and AED nodeid for DataFrame.assign()
@@ -7412,6 +7775,11 @@ class DataFrame():
                 Default Value: False
                 Types: bool
+            node_id:
+                Optional Argument.
+                Specifies the input nodeid for the assign operation.
+                Types: str
             kwargs:
                 keyword, value pairs
                 - keywords are the column names.
@@ -7439,7 +7807,7 @@ class DataFrame():
         # Join the expressions in result.
         assign_expression = ', '.join(list(map(lambda x: x[1], result)))
-        new_nodeid = self._aed_utils._aed_assign(self._nodeid,
+        new_nodeid = self._aed_utils._aed_assign(node_id,
                                                  assign_expression,
                                                  AEDConstants.AED_ASSIGN_DROP_EXISITING_COLUMNS.value)
@@ -7571,14 +7939,14 @@ class DataFrame():
             _Validators._check_auth_token("udf")
             for colname, col in udf_expr.items():
                 env_name = UtilFuncs._get_env_name(col)
-                # Store the env_name and its corresponding output column
+                # Store the env_name and its corresponding output column
                 if env_name in env_mapper:
                     env_mapper[env_name].append(colname)
                 else:
                     env_mapper[env_name] = [colname]
         else:
             env_mapper[env_name] = udf_expr.keys()
+        debug = False
         for env_name, cols in env_mapper.items():
             # Create a dictionary of output columns to column type.
             returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
@@ -7589,6 +7957,7 @@ class DataFrame():
             # Create a dictionary of output column name to udf arguments
             function_args = {}
             for colname, col in udf_expr.items():
+                debug |= col._debug
                 delimiter = col._delimiter
                 quotechar = col._quotechar
                 if colname in cols:
@@ -7621,15 +7990,17 @@ class DataFrame():
                                                 columns_definitions=columns_definitions,
                                                 output_type_converters={
                                                     col_name: _Dtypes._teradata_type_to_python_type(col_type)
-                                                    for col_name, col_type in returns.items()})
+                                                    for col_name, col_type in returns.items()},
+                                                debug=debug
+                                                )
             df = tbl_operators.execute()
         return df
     def _assign_call_udf(self, call_udf_expr):
         """
         DESCRIPTION:
-            Internal function for DataFrame.assign() to execute the call_udf using
+            Internal function for DataFrame.assign() to execute the call_udf using
             Script/Apply Table Operator and create new column for teradataml DataFrame.
         PARAMETER:
@@ -7656,7 +8027,7 @@ class DataFrame():
         # Create a dictionary of output columns to column type (python types).
         output_type_converters = {col_name: _Dtypes._teradata_type_to_python_type(col_type) \
                                   for col_name, col_type in returns.items()}
         for colname, col in call_udf_expr.items():
             returns[colname] = col.type
             output_type_converters[colname] = _Dtypes._teradata_type_to_python_type(col.type)
@@ -7782,7 +8153,7 @@ class DataFrame():
                Look at Example 18 to understand more.
              8. While passing multiple udf expressions, one can not pass one column output
                as another column input in the same ``assign`` call.
-             9. If user pass multiple udf expressions, delimiter and quotechar specified in
+             9. If user pass multiple udf expressions, delimiter and quotechar specified in
                last udf expression are considered for processing.
         RAISES:
@@ -8147,13 +8518,13 @@ class DataFrame():
             Red Inc     200.0  150.0  140.0    NaN  17/01/04    201.0     abc     RED INC      207
             >>>
-            # Example 19: Convert the values is 'accounts' column to upper case using a user
+            # Example 19: Convert the values is 'accounts' column to upper case using a user
             #             defined function on Vantage Cloud Lake.
             # Create a Python 3.10.5 environment with given name and description in Vantage.
             >>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
             User environment 'test_udf' created.
             >>>
-            # Create a user defined functions to 'to_upper' to get the values in upper case
+            # Create a user defined functions to 'to_upper' to get the values in upper case
             # and pass the user env to run it on.
             >>> from teradataml.dataframe.functions import udf
             >>> @udf(env_name = env)
@@ -8165,7 +8536,7 @@ class DataFrame():
             # to the DataFrame.
             >>> df.assign(upper_stats = to_upper('accounts'))
                           Feb    Jan    Mar    Apr  datetime upper_stats
-            accounts
+            accounts
             Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
             Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
             Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
@@ -8184,12 +8555,12 @@ class DataFrame():
             # Register the created user defined function with name "upper".
             >>> register("upper", to_upper)
             >>>
-            # Call the user defined function registered with name "upper" and assign the
+            # Call the user defined function registered with name "upper" and assign the
             # ColumnExpression returned to the DataFrame.
             >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
             >>> res
                           Feb    Jan    Mar    Apr  datetime   upper_col
-            accounts
+            accounts
             Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
             Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
             Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
@@ -8263,8 +8634,34 @@ class DataFrame():
         # from udf expression.
         if bool(regular_expr):
             try:
-                (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(drop_columns, **regular_expr)
+                root_node_id = None
+                root_df_col = df.columns
+                # Get the previous node type, if it is assign and drop_columns is False,
+                # then check if the previous assign arguments exists and are not present
+                # in either the root dataframe columns or the current assign arguments.
+                # if these conditions are met, obtain the root node id (i.e., the first
+                # node of the assign operation) and merge the previous assign arguments with the current ones.
+                prev_node_type = df._aed_utils._aed_get_node_query_type(df._nodeid)
+                if not drop_columns and prev_node_type == "assign" and df._previous_assign_args is not None:
+                    if not df._root_columns & df._previous_assign_args.keys() and \
+                       not df._previous_assign_args.keys() & regular_expr.keys():
+                        # Get the root node id and root dataframe columns.
+                        root_df_col = df._root_columns
+                        root_node_id = df._aed_utils._aed_get_parent_nodeids(df._nodeid)[0]
+                        regular_expr = {**df._previous_assign_args, **regular_expr}
+                # If root_node_id is None, assign the current node id as root node of assign operation
+                node_id = root_node_id if root_node_id is not None else df._nodeid
+                # Generate new meta expression and node id for the new dataframe.
+                (new_meta, new_nodeid) = df._generate_assign_metaexpr_aed_nodeid(
+                                drop_columns, node_id = node_id, **regular_expr)
                 df = df._create_dataframe_from_node(new_nodeid, new_meta, df._index_label)
+                df._previous_assign_args = regular_expr
+                df._root_columns = root_df_col
             except Exception as err:
                 errcode = MessageCodes.TDMLDF_INFO_ERROR
                 msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
@@ -8475,7 +8872,9 @@ class DataFrame():
         _Validators._validate_column_exists_in_dataframe(keys, self._metaexpr)
         try:
-            new_index_list = self._index_label if self._index_label is not None else []
+            # Slicing creates a new list instance with the same contents.
+            new_index_list = self._index_label[:] if self._index_label is not None else []
             # Creating a list with requested index labels bases on append
             if append:
@@ -8490,7 +8889,7 @@ class DataFrame():
                     new_index_list = keys
             # Takes care of appending already existing index
-            new_index_list = list(set(new_index_list))
+            new_index_list = list(dict.fromkeys(new_index_list))
             # In case requested index is same as existing index, return same DF
             if new_index_list == self._index_label:
@@ -9373,15 +9772,15 @@ class DataFrame():
             TypeError, ValueError, TeradataMLException
         EXAMPLES:
-            >>> # Load the example datasets.
-            ... load_example_data("dataframe", ["ocean_buoys", "ocean_buoys_nonpti"])
+            # Load the example datasets.
+            >>> load_example_data("dataframe", ["ocean_buoys", "ocean_buoys_nonpti"])
             >>>
-            >>> # Create the required DataFrames.
-            ... # DataFrame on non-sequenced PTI table
-            ... ocean_buoys = DataFrame("ocean_buoys")
-            >>> # Check DataFrame columns and let's peek at the data
-            ... ocean_buoys.columns
+            # Create the required DataFrames.
+            # DataFrame on non-sequenced PTI table
+            >>> ocean_buoys = DataFrame("ocean_buoys")
+            # Check DataFrame columns and let's peek at the data
+            >>> ocean_buoys.columns
             ['buoyid', 'TD_TIMECODE', 'temperature', 'salinity']
             >>> ocean_buoys.head()
                                    TD_TIMECODE  temperature  salinity
@@ -9397,10 +9796,10 @@ class DataFrame():
             0       2014-01-06 08:00:00.000000         10.0        55
             0       2014-01-06 08:10:00.000000         10.0        55
-            >>> # DataFrame on NON-PTI table
-            ... ocean_buoys_nonpti = DataFrame("ocean_buoys_nonpti")
-            >>> # Check DataFrame columns and let's peek at the data
-            ... ocean_buoys_nonpti.columns
+            # DataFrame on NON-PTI table
+            >>> ocean_buoys_nonpti = DataFrame("ocean_buoys_nonpti")
+            # Check DataFrame columns and let's peek at the data
+            >>> ocean_buoys_nonpti.columns
             ['buoyid', 'timecode', 'temperature', 'salinity']
             >>> ocean_buoys_nonpti.head()
                                         buoyid  temperature  salinity
@@ -9974,6 +10373,15 @@ class DataFrame():
         # If user did not pass any arguments which form join conditions,
         # Merge is performed using index columns of TeradataML DataFrames
         if on is None and left_on is None and right_on is None and not use_index:
+            # DataFrames created on OTF table will not have index.
+            if self._datalake is not None or right._datalake is not None:
+                msg_code = MessageCodes.EXECUTION_FAILED
+                emsg = "Either 'on' argument or both 'left_on' and 'right_on' arguments" \
+                       " must be provided to merge DataFrames when they are created on" \
+                       " OTF table(s)."
+                error_msg = Messages.get_message(msg_code, "merge dataframes", emsg)
+                raise TeradataMlException(error_msg, msg_code)
             if self._index_label is None or right._index_label is None:
                 raise TeradataMlException(
                     Messages.get_message(MessageCodes.TDMLDF_INDEXES_ARE_NONE), MessageCodes.TDMLDF_INDEXES_ARE_NONE)
@@ -9981,6 +10389,12 @@ class DataFrame():
                 use_index = True
         if use_index:
+            if self._datalake is not None or right._datalake is not None:
+                msg_code = MessageCodes.EXECUTION_FAILED
+                emsg = "Can not use Index to merge DataFrames when they are created on OTF table(s)."
+                error_msg = Messages.get_message(msg_code, "merge dataframes", emsg)
+                raise TeradataMlException(error_msg, msg_code)
             if self._index_label is None or right._index_label is None:
                 raise TeradataMlException(
                     Messages.get_message(MessageCodes.TDMLDF_INDEXES_ARE_NONE), MessageCodes.TDMLDF_INDEXES_ARE_NONE)
@@ -10636,7 +11050,7 @@ class DataFrame():
                     2. seed is supported for stratify column.
                     3. Arguments "stratify_column", "seed", "id_column" are supported only
                        for stratifying the data.
-                Types: str
+                Types: str OR Feature
             seed:
                 Optional Argument.
@@ -10662,7 +11076,7 @@ class DataFrame():
                        for stratifying the data.
                     2. "id_column" is supported only when "stratify_column" is used.
                        Ignored otherwise.
-                Types: str
+                Types: str OR Feature
         RETURNS:
             teradataml DataFrame
@@ -11191,6 +11605,10 @@ class DataFrame():
         DESCRIPTION:
             Function to apply a user defined function to each row in the
             teradataml DataFrame, leveraging Vantage's Script Table Operator.
+            Notes:
+                1. The function requires to use same Python version in both Vantage and local environment.
+                2. Teradata recommends to use "dill" package with same version in both Vantage and
+                   local environment.
         PARAMETERS:
             user_function:
@@ -11371,6 +11789,15 @@ class DataFrame():
                 Default Value: True
                 Types: bool
+            debug:
+                Optional Argument.
+                Specifies whether to display the script file path generated during function execution or not. This
+                argument helps in debugging when there are any failures during function execution. When set
+                to True, function displays the path of the script and does not remove the file from local file system.
+                Otherwise, file is removed from the local file system.
+                Default Value: False
+                Types: bool
         RETURNS:
             1. teradataml DataFrame if exec_mode is "IN-DB".
             2. Pandas DataFrame if exec_mode is "LOCAL".
@@ -11523,6 +11950,7 @@ class DataFrame():
         sort_ascending = kwargs.pop('sort_ascending', True)
         auth = kwargs.pop('auth', None)
         charset = kwargs.pop('charset', None)
+        debug = kwargs.pop('debug', False)
         # Check for other extra/unknown arguments.
         unknown_args = list(kwargs.keys())
@@ -11541,7 +11969,7 @@ class DataFrame():
                                           sort_ascending=sort_ascending,
                                           returns=returns, delimiter=delimiter,
                                           quotechar=quotechar, auth=auth,
-                                          charset=charset, num_rows=num_rows)
+                                          charset=charset, num_rows=num_rows, debug=debug)
         return tbl_op_util.execute()
@@ -11558,6 +11986,10 @@ class DataFrame():
         DESCRIPTION:
             Function to apply a user defined function to a group or partition of rows
             in the teradataml DataFrame, leveraging Vantage's Script Table Operator.
+            Notes:
+                1. The function requires to use same Python version in both Vantage and local environment.
+                2. Teradata recommends to use "dill" package with same version in both Vantage and
+                   local environment.
         PARAMETERS:
             user_function:
@@ -11768,6 +12200,15 @@ class DataFrame():
                 Default Value: True
                 Types: bool
+            debug:
+                Optional Argument.
+                Specifies whether to display the script file path generated during function execution or not. This
+                argument helps in debugging when there are any failures during function execution. When set
+                to True, function displays the path of the script and does not remove the file from local file system.
+                Otherwise, file is removed from the local file system.
+                Default Value: False
+                Types: bool
         RETURNS:
             1. teradataml DataFrame if exec_mode is "IN-DB".
             2. Pandas DataFrame if exec_mode is "LOCAL".
@@ -11933,6 +12374,7 @@ class DataFrame():
         sort_ascending = kwargs.pop('sort_ascending', True)
         auth = kwargs.pop('auth', None)
         charset = kwargs.pop('charset', None)
+        debug = kwargs.pop('debug', False)
         # Check for other extra/unknown arguments.
         unknown_args = list(kwargs.keys())
@@ -11951,7 +12393,7 @@ class DataFrame():
                                           sort_ascending=sort_ascending,
                                           returns=returns, delimiter=delimiter,
                                           quotechar=quotechar, auth=auth,
-                                          charset=charset, num_rows=num_rows)
+                                          charset=charset, num_rows=num_rows, debug=debug)
         return tbl_op_util.execute()
@@ -11968,9 +12410,9 @@ class DataFrame():
             teradataml DataFrame, leveraging Apply Table Operator of Open
             Analytics Framework.
             Notes:
-                 1. The function requires dill package with same version in both remote environment
-                    and local environment.
-                 2. Teradata recommends to use same Python version in both remote and local environment.
+                1. The function requires to use same Python version in both remote environment and local environment.
+                2. Teradata recommends to use "dill" package with same version in both remote environment and
+                   local environment.
         PARAMETERS:
             user_function:
@@ -12153,6 +12595,15 @@ class DataFrame():
                 Default value: "csv"
                 Types: str
+            debug:
+                Optional Argument.
+                Specifies whether to display the script file path generated during function execution or not. This
+                argument helps in debugging when there are any failures during function execution. When set
+                to True, function displays the path of the script and does not remove the file from local file system.
+                Otherwise, file is removed from the local file system.
+                Default Value: False
+                Types: bool
         RETURNS:
             teradataml DataFrame.
@@ -12329,6 +12780,7 @@ class DataFrame():
         is_local_order = kwargs.pop('is_local_order', False)
         nulls_first = kwargs.pop('nulls_first', True)
         sort_ascending = kwargs.pop('sort_ascending', True)
+        debug = kwargs.pop('debug', False)
         # Check for other extra/unknown arguments.
         unknown_args = list(kwargs.keys())
@@ -12351,7 +12803,8 @@ class DataFrame():
                                           charset=None,
                                           num_rows=num_rows,
                                           env_name=env_name,
-                                          style=style)
+                                          style=style,
+                                          debug=debug)
         return tbl_op_util.execute()
@@ -12696,8 +13149,8 @@ class DataFrame():
             _Validators._validate_column_exists_in_dataframe(column_names, self._metaexpr,
                                                              False)
             column_names = list(dict.fromkeys(column_names))
-        if list_td_reserved_keywords(column_names):
+        if list_td_reserved_keywords(column_names) or UtilFuncs._is_ascii(column_names):
             column_names = UtilFuncs._teradata_quote_arg(column_names, "\"", False)
         col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, column_names)
@@ -14617,7 +15070,18 @@ class DataFrame():
             >>> plot.show()
         """
-        return _Plot(x=x, y=y, scale=scale, kind=kind, **kwargs)
+        _plot = _Plot(x=x, y=y, scale=scale, kind=kind, **kwargs)
+        # If plot is already generated, return the same plot.
+        if self._plot is None:
+            self._plot = _plot
+            return _plot
+        if self._plot == _plot:
+            return self._plot
+        else:
+            self._plot = _plot
+            return _plot
     @collect_queryband(queryband="DF_itertuples")
     def itertuples(self, name='Row', num_rows=None):
@@ -15057,7 +15521,7 @@ class DataFrameGroupBy(DataFrame):
         from sqlalchemy.sql.functions import Function
         return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression, ClauseElement)
-    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
+    def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, node_id, **kwargs):
         """
         DESCRIPTION:
             Function generates the MetaExpression and AED nodeid for DataFrameGroupBy.assign()
@@ -15070,6 +15534,11 @@ class DataFrameGroupBy(DataFrame):
                 and grouping columns are returned. This is unused argument.
                 Types: bool
+            node_id:
+                Optional Argument.
+                Specifies the input nodeid for the assign operation. This is unused argument.
+                Types: str
             kwargs:
                 keyword, value pairs
                 - keywords are the column names.
@@ -17510,11 +17979,18 @@ class _TDUAF(DataFrame):
             table_name = self._db_utils._execute_node_return_db_object_name(self._data._nodeid, self._data._metaexpr)
         # UAF Functions do not accept double quotes.
+        tdp = preparer(td_dialect)
         db_name = UtilFuncs._extract_db_name(table_name)
-        if db_name:
-            table_name = '"{}"."{}"'.format(db_name, UtilFuncs._extract_table_name(table_name))
+        datalake_name = UtilFuncs._extract_datalake_name(table_name)
+        if datalake_name:
+            table_name = '{}.{}.{}'.format(tdp.quote(datalake_name),
+                                           tdp.quote(db_name),
+                                           tdp.quote(UtilFuncs._extract_table_name(table_name)))
+        elif db_name:
+            table_name = '{}.{}'.format(tdp.quote(db_name),
+                                        tdp.quote(UtilFuncs._extract_table_name(table_name)))
         else:
-            table_name = UtilFuncs._extract_table_name(table_name)
+            table_name = tdp.quote(UtilFuncs._extract_table_name(table_name))
         sql_clauses.append("TABLE_NAME ({})")
         sql_values.append(table_name)

teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.3py3-none-any.whl → 20.0.0.5py3-none-any.whl