PyPI - teradataml - Versions diffs - 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl - Mend

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +315 -2
teradataml/__init__.py +4 -0
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +95 -8
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/metadata.py +12 -3
teradataml/analytics/json_parser/utils.py +7 -2
teradataml/analytics/sqle/__init__.py +5 -1
teradataml/analytics/table_operator/__init__.py +1 -1
teradataml/analytics/uaf/__init__.py +1 -1
teradataml/analytics/utils.py +4 -0
teradataml/analytics/valib.py +18 -4
teradataml/automl/__init__.py +51 -6
teradataml/automl/data_preparation.py +59 -35
teradataml/automl/data_transformation.py +58 -33
teradataml/automl/feature_engineering.py +27 -12
teradataml/automl/model_training.py +73 -46
teradataml/common/constants.py +88 -29
teradataml/common/garbagecollector.py +2 -1
teradataml/common/messagecodes.py +19 -3
teradataml/common/messages.py +6 -1
teradataml/common/sqlbundle.py +64 -12
teradataml/common/utils.py +246 -47
teradataml/common/warnings.py +11 -0
teradataml/context/context.py +161 -27
teradataml/data/amazon_reviews_25.csv +26 -0
teradataml/data/byom_example.json +11 -0
teradataml/data/dataframe_example.json +18 -2
teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/hnsw_alter_data.csv +5 -0
teradataml/data/hnsw_data.csv +10 -0
teradataml/data/jsons/byom/h2opredict.json +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +266 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
teradataml/data/medical_readings.csv +101 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -2
teradataml/data/teradataml_example.json +8 -0
teradataml/data/vectordistance_example.json +4 -0
teradataml/dataframe/copy_to.py +8 -3
teradataml/dataframe/data_transfer.py +11 -1
teradataml/dataframe/dataframe.py +1049 -285
teradataml/dataframe/dataframe_utils.py +152 -20
teradataml/dataframe/functions.py +578 -35
teradataml/dataframe/setop.py +11 -6
teradataml/dataframe/sql.py +185 -16
teradataml/dbutils/dbutils.py +1049 -115
teradataml/dbutils/filemgr.py +48 -1
teradataml/hyperparameter_tuner/optimizer.py +12 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/_base.py +1466 -0
teradataml/opensource/_class.py +464 -0
teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
teradataml/opensource/_lightgbm.py +949 -0
teradataml/opensource/_sklearn.py +1008 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
teradataml/options/__init__.py +54 -38
teradataml/options/configure.py +131 -27
teradataml/options/display.py +13 -2
teradataml/plot/axis.py +47 -8
teradataml/plot/figure.py +33 -0
teradataml/plot/plot.py +63 -13
teradataml/scriptmgmt/UserEnv.py +5 -5
teradataml/scriptmgmt/lls_utils.py +130 -40
teradataml/store/__init__.py +12 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2318 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/table_operators/Apply.py +32 -18
teradataml/table_operators/Script.py +3 -1
teradataml/table_operators/TableOperator.py +3 -1
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +37 -38
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/utils/dtypes.py +51 -2
teradataml/utils/internal_buffer.py +18 -0
teradataml/utils/validators.py +99 -8
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_class.py +0 -255
teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0

teradataml/dataframe/dataframe.py CHANGED Viewed

@@ -19,6 +19,10 @@ import pandas as pd
 import re
 import sqlalchemy
 import sys
+import urllib.parse
+from sqlalchemy import Column
 import teradataml.context.context as tdmlctx
 from collections import OrderedDict, namedtuple
@@ -30,6 +34,7 @@ from teradataml.dataframe.sql_interfaces import ColumnExpression
 from teradataml.dataframe.sql_functions import case
 from teradataml.series.series import Series
 from teradatasqlalchemy.types import _TDType, BIGINT, INTEGER, PERIOD_TIMESTAMP, SMALLINT, BYTEINT, FLOAT, DECIMAL
+from teradataml.common.deprecations import argument_deprecation
 from teradataml.common.utils import UtilFuncs
 from teradataml.common.exceptions import TeradataMlException
 from teradataml.common.messages import Messages
@@ -41,9 +46,11 @@ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils, Dat
 from teradataml.dataframe.indexer import _LocationIndexer
 from teradataml.common.aed_utils import AedUtils
 from teradataml.options.display import display
+from teradataml.options.configure import configure
 from teradataml.dataframe.copy_to import copy_to_sql
 from teradataml.dataframe.row import _Row
 from teradataml.dataframe.setop import concat
+from teradataml.dbutils.dbutils import list_td_reserved_keywords
 from teradataml.plot.plot import _Plot
 from teradataml.scriptmgmt.UserEnv import UserEnv
 from teradataml.utils.dtypes import _Dtypes, _ListOf, _TupleOf
@@ -57,10 +64,83 @@ from teradataml.common.bulk_exposed_utils import _validate_unimplemented_functio
 from teradataml.telemetry_utils.queryband import collect_queryband
 from teradataml.options.configure import configure
 from teradataml.utils.internal_buffer import _InternalBuffer
+from teradataml.common.constants import OutputStyle
 # TODO use logger when available on master branch
 # logger = teradatapylog.getLogger()
-in_schema = UtilFuncs._in_schema
+class in_schema:
+    """
+    Class takes a schema name, a table name and datalake name attributes
+    and creates an object that can be passed to DataFrame.
+    Note:
+        teradataml recommends to use this class to access table(s)/view(s),
+        from the database other than the default database.
+    """
+    def __init__(self, schema_name, table_name, datalake_name=None):
+        """
+        Constructor for in_schema class.
+        PARAMETERS:
+            schema_name:
+                Required Argument.
+                Specifies the schema where the table resides in.
+                Types: str
+            table_name:
+                Required Argument.
+                Specifies the table name or view name in Vantage.
+                Types: str
+            datalake_name:
+                Optional Argument.
+                Specifies the datalake name.
+                Types: str
+        EXAMPLES:
+            from teradataml.dataframe.dataframe import in_schema, DataFrame
+            # Example 1: The following example creates a DataFrame from the
+            #            existing Vantage table "dbcinfo" in the non-default
+            #            database "dbc" using the in_schema instance.
+            df = DataFrame(in_schema("dbc", "dbcinfo"))
+            # Example 2: The following example uses from_table() function, existing
+            #            Vantage table "dbcinfo" and non-default database "dbc" to
+            #            create a teradataml DataFrame.
+            df = DataFrame.from_table(in_schema("dbc","dbcinfo"))
+            # Example 3: The following example uses "in_schema" object created
+            #            with "datalake_name" argument to create DataFrame on OTF table.
+            otf_df = DataFrame(in_schema("datalake_db","datalake_table","datalake"))
+        """
+        self.schema_name = schema_name
+        self.table_name = table_name
+        self.datalake_name = datalake_name
+        awu_matrix = []
+        awu_matrix.append(["schema_name", schema_name, False, (str), True])
+        awu_matrix.append(["table_name", table_name, False, (str), True])
+        awu_matrix.append(["datalake_name", datalake_name, True, (str), True])
+        # Validate argument types
+        _Validators._validate_function_arguments(awu_matrix)
+    def __str__(self):
+        """
+        Returns the string representation of in_schema instance.
+        """
+        tbl_name = '{}.{}'.format(UtilFuncs._teradata_quote_arg(self.schema_name, "\"", False),
+                                  UtilFuncs._teradata_quote_arg(self.table_name, "\"", False))
+        if not self.datalake_name:
+            return tbl_name
+        return '{}.{}'.format(UtilFuncs._teradata_quote_arg(self.datalake_name, "\"", False), tbl_name)
+in_schema = in_schema
 class DataFrame():
@@ -163,6 +243,19 @@ class DataFrame():
         # Property to determine if table is an ART table or not.
         self._is_art = None
+        self._datalake = None
+        self._database = None
+        self._table = None
+        self._otf = False
+        if isinstance(table_name, in_schema):
+            self._table = table_name.table_name
+            self._datalake = table_name.datalake_name
+            self._database = table_name.schema_name
+            self._otf = True if self._datalake else False
+        table_name = str(table_name) if isinstance(table_name, in_schema) else table_name
         # Below matrix is list of list, where in each row contains following elements:
         # Let's take an example of following, just to get an idea:
         #   [element1, element2, element3, element4, element5, element6]
@@ -195,25 +288,45 @@ class DataFrame():
                 self._source_type = SourceType.TABLE.value
                 self._nodeid = self._aed_utils._aed_table(self._table_name)
             elif query is not None:
+                query = query.strip()
+                query = query[:-1] if query[-1] == ";" else query
                 self._query = query
                 self._source_type = SourceType.QUERY.value
-                if materialize:
-                    # If user requests to materialize the the query, then we should create a
+                temp_obj_params = {
+                    "prefix": "_frmqry_v",
+                    "use_default_database": True,
+                    "quote": False
+                }
+                __execute = UtilFuncs._create_view
+                if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
+                    # If user requests to materialize the query, then we should create a
+                    # volatile table if user intends to the same instead of view.
+                    # Volatile table does not need to be added to the GC.
+                    temp_obj_params["table_type"] = TeradataConstants.TERADATA_VOLATILE_TABLE
+                    temp_obj_params["gc_on_quit"] = False
+                    temp_obj_params["prefix"] = "_frmqry_vt"
+                    __execute = UtilFuncs._create_table
+                elif materialize:
+                    # If user requests to materialize the query, then we should create a
                     # table instead of view and add the same in the GarbageCollector.
-                    temp_table_name = UtilFuncs._generate_temp_table_name(prefix="_frmqry_t", use_default_database=True,
-                                                                          quote=False,
-                                                                          table_type=TeradataConstants.TERADATA_TABLE)
-                else:
-                    temp_table_name = UtilFuncs._generate_temp_table_name(prefix="_frmqry_v", use_default_database=True,
-                                                                          quote=False)
+                    temp_obj_params["table_type"] = TeradataConstants.TERADATA_TABLE
+                    temp_obj_params["gc_on_quit"] = True
+                    temp_obj_params["prefix"] = "_frmqry_t"
+                    __execute = UtilFuncs._create_table
+                temp_table_name = UtilFuncs._generate_temp_table_name(**temp_obj_params)
                 self._table_name = temp_table_name
+                __execute_params = (self._table_name, self._query)
+                if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
+                    __execute_params = (self._table_name, self._query, True)
                 try:
-                    if materialize:
-                        UtilFuncs._create_table(self._table_name, self._query)
-                    else:
-                        UtilFuncs._create_view(self._table_name, self._query)
+                    __execute(*__execute_params)
                 except OperationalError as oe:
                     if "[Error 3707] Syntax error" in str(oe):
                         raise ValueError(Messages.get_message(
@@ -229,7 +342,7 @@ class DataFrame():
                 self._nodeid = self._aed_utils._aed_query(self._query, temp_table_name)
             else:
-                if inspect.stack()[1][3] not in ['_from_node', '__init__']:
+                if inspect.stack()[1][3] not in ['_from_node', '__init__', 'alias']:
                     raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
                                               MessageCodes.TDMLDF_CREATE_FAIL)
@@ -241,6 +354,10 @@ class DataFrame():
             self._iloc = _LocationIndexer(self, integer_indexing=True)
             self.__data = None
             self.__data_columns = None
+            self._alias = None
+            self._plot = None
+            self._eda_ui = None
         except TeradataMlException:
             raise
@@ -250,9 +367,106 @@ class DataFrame():
             raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
                                       MessageCodes.TDMLDF_CREATE_FAIL) from err
+    @property
+    def db_object_name(self):
+        """
+        DESCRIPTION:
+            Get the underlying database object name, on which DataFrame is
+            created.
+        RETURNS:
+            str representing object name of DataFrame
+        EXAMPLES:
+            >>> load_example_data("dataframe", "sales")
+            >>> df = DataFrame('sales')
+            >>> df.db_object_name
+            '"sales"'
+        """
+        if self._table_name is not None:
+            return self._table_name
+        else:
+            msg = "Object name is available once DataFrame is materialized. " \
+                  "Use DataFrame.materialize() to materialize DataFrame."
+            print(msg)
+    def alias(self, alias_name):
+        """
+        DESCRIPTION:
+            Method to create an aliased teradataml DataFrame.
+            Note:
+                * This method is recommended to be used before performing
+                  self join using DataFrame's join() API.
+        PARAMETERS:
+            alias_name:
+                Required Argument.
+                Specifies the alias name to be assigned to a teradataml DataFrame.
+                Types: str
+        RETURNS:
+            teradataml DataFrame
+        EXAMPLES:
+            >>> load_example_data("dataframe", "admissions_train")
+            >>> df = DataFrame("admissions_train")
+            >>> df
+               masters   gpa     stats programming  admitted
+            id
+            13      no  4.00  Advanced      Novice         1
+            26     yes  3.57  Advanced    Advanced         1
+            5       no  3.44    Novice      Novice         0
+            19     yes  1.98  Advanced    Advanced         0
+            15     yes  4.00  Advanced    Advanced         1
+            40     yes  3.95    Novice    Beginner         0
+            7      yes  2.33    Novice      Novice         1
+            22     yes  3.46    Novice    Beginner         0
+            36      no  3.00  Advanced      Novice         0
+            38     yes  2.65  Advanced    Beginner         1
+            # Example 1: Create an alias of teradataml DataFrame.
+            >>> df2 = df.alias("adm_trn")
+            # Print aliased DataFrame.
+            >>> df2
+               masters   gpa     stats programming  admitted
+            id
+            13      no  4.00  Advanced      Novice         1
+            26     yes  3.57  Advanced    Advanced         1
+            5       no  3.44    Novice      Novice         0
+            19     yes  1.98  Advanced    Advanced         0
+            15     yes  4.00  Advanced    Advanced         1
+            40     yes  3.95    Novice    Beginner         0
+            7      yes  2.33    Novice      Novice         1
+            22     yes  3.46    Novice    Beginner         0
+            36      no  3.00  Advanced      Novice         0
+            38     yes  2.65  Advanced    Beginner         1
+        """
+        arg_info_matrix = [["alias_name", alias_name, False, (str), True]]
+        _Validators._validate_function_arguments(arg_info_matrix)
+        try:
+            alias_df = self._from_node(self._nodeid, self._metaexpr, self._index_label,
+                                       reuse_metaexpr=False, _datalake=self._datalake,
+                                       _database=self._database, _table=self._table,
+                                       _otf=self._otf)
+            # Assigning self attributes to newly created alias dataframe.
+            alias_df._table_name = self._table_name
+            alias_df._index = self._index
+            alias_df._index_label = self._index_label
+            setattr(alias_df._metaexpr.t, "table_alias", alias_name)
+            alias_df._alias = alias_name
+            return alias_df
+        except Exception as err:
+            error_code = MessageCodes.EXECUTION_FAILED
+            error_msg = Messages.get_message(
+                error_code, "create alias dataFrame", '{}'.format(str(err)))
+            raise TeradataMlException(error_msg, error_code)
     @classmethod
     @collect_queryband(queryband="DF_fromTable")
-    def from_table(cls, table_name, index=True, index_label=None):
+    def from_table(cls, table_name, index=True, index_label=None,
+                   schema_name=None, datalake_name=None):
         """
         Class method for creating a DataFrame from a table or a view.
@@ -273,30 +487,48 @@ class DataFrame():
                 Column/s used for sorting.
                 Types: str
+            schema_name:
+                Optional Argument.
+                Specifies the schema where the table resides.
+                Types: str
+            datalake_name:
+                Optional Argument.
+                Specifies the datalake name.
+                Types: str
         EXAMPLES:
-            from teradataml.dataframe.dataframe import DataFrame
+            >>> from teradataml.dataframe.dataframe import DataFrame
             # Example 1: The following example creates a DataFrame from a table or
                          a view.
             # Load the example data.
-            load_example_data("dataframe","sales")
+            >>> load_example_data("dataframe","sales")
             # Create DataFrame from table
-            df = DataFrame.from_table('sales')
+            >>> df = DataFrame.from_table('sales')
             # Create DataFrame from table and without index column sorting.
-            df = DataFrame.from_table("sales", False)
+            >>> df = DataFrame.from_table("sales", False)
             # Create DataFrame from table and sorting using the 'accounts'
             # column.
-            df = DataFrame.from_table("sales", True, "accounts")
+            >>> df = DataFrame.from_table("sales", True, "accounts")
             # Example 2: The following example creates a DataFrame from existing Vantage
             #            table "dbcinfo" in the non-default database "dbc" using the
             #            in_schema() function.
-            from teradataml.dataframe.dataframe import in_schema
-            df = DataFrame.from_table(in_schema("dbc", "dbcinfo"))
+            >>> from teradataml.dataframe.dataframe import in_schema
+            >>> df = DataFrame.from_table(in_schema("dbc", "dbcinfo"))
+            # Example 3: Create a DataFrame on existing DataLake
+            #            table "lake_table" in the "datalake_database" database
+            #            in "datalake" datalake.
+            >>> datalake_df = DataFrame.from_table(table_name="lake_table",
+            ...                                    schema_name="datalake_database",
+            ...                                    datalake_name="datalake" )
         RETURNS:
             DataFrame
@@ -305,6 +537,9 @@ class DataFrame():
             TeradataMlException - TDMLDF_CREATE_FAIL
         """
+        if schema_name:
+            return cls(in_schema(schema_name, table_name, datalake_name))
         return cls(table_name, index, index_label)
     @classmethod
@@ -364,7 +599,7 @@ class DataFrame():
         return cls(index=index, index_label=index_label, query=query, materialize=materialize)
     @classmethod
-    def _from_node(cls, nodeid, metaexpr, index_label=None, undropped_index=None):
+    def _from_node(cls, nodeid, metaexpr, index_label=None, undropped_index=None, reuse_metaexpr=True, **kwargs):
         """
         Private class method for creating a DataFrame from a nodeid and parent metadata.
@@ -385,6 +620,12 @@ class DataFrame():
                 Optional Argument.
                 List specifying index column(s) to be retained as columns for printing.
+            reuse_metaexpr:
+                Optional Argument.
+                Specifies the flag to decide whether to use same _MetaExpression object or not.
+                Default Value: True
+                Types: bool
         EXAMPLES:
             from teradataml.dataframe.dataframe import DataFrame
             df = DataFrame._from_node(1234, metaexpr)
@@ -400,30 +641,50 @@ class DataFrame():
         df = cls()
         df._nodeid = nodeid
         df._source_type = SourceType.TABLE.value
-        df._get_metadata_from_metaexpr(metaexpr)
+        if not reuse_metaexpr:
+            # Create new _MetaExpression object using reference metaExpression
+            # for newly created DataFrame.
+            df._metaexpr = UtilFuncs._get_metaexpr_using_parent_metaexpr(nodeid, metaexpr)
+            # When metaexpression is created using only column information from parent DataFrame,
+            # underlying SQLAlchemy table is created with '' string as Table name.
+            # Assign name from reference mataexpression here.
+            df._metaexpr.t.name = metaexpr.t.name
+            # Populate corresponding information into newly created DataFrame object
+            # using newly created metaExpression.
+            df._get_metadata_from_metaexpr(df._metaexpr)
+        else:
+            # Populate corresponding information into newly created DataFrame object
+            # using reference metaExpression.
+            df._get_metadata_from_metaexpr(metaexpr)
         if isinstance(index_label, str):
             index_label = [index_label]
-        if index_label is not None and all(elem in [col.name for col in metaexpr.c] for elem in index_label):
+        if index_label is not None and all(elem in [col.name for col in df._metaexpr.c] for elem in index_label):
             df._index_label = index_label
         elif index_label is not None and all(UtilFuncs._teradata_quote_arg(elem, "\"", False)
-                                             in [col.name for col in metaexpr.c] for elem in index_label):
+                                             in [col.name for col in df._metaexpr.c] for elem in index_label):
             df._index_label = index_label
         # Set the flag suggesting that the _index_label is set,
-        # and that a database lookup wont be required even when it is None.
+        # and that a database lookup won't be required even when it is None.
         df._index_query_required = False
         if isinstance(undropped_index, str):
             undropped_index = [undropped_index]
-        if undropped_index is not None and all(elem in [col.name for col in metaexpr.c] for elem in undropped_index):
+        if undropped_index is not None and all(elem in [col.name for col in df._metaexpr.c] for elem in undropped_index):
             df._undropped_index = undropped_index
         elif undropped_index is not None and all(UtilFuncs._teradata_quote_arg(elem, "\"", False)
-                                                 in [col.name for col in metaexpr.c] for elem in undropped_index):
+                                                 in [col.name for col in df._metaexpr.c] for elem in undropped_index):
             df._undropped_index = undropped_index
+        # Populate remaining attributes.
+        for arg in kwargs:
+            # Pop each argument from kwargs and assign to new DataFrame.
+            arg_value = kwargs.get(arg)
+            df.__setattr__(arg, arg_value)
         return df
     def create_temp_view(self, name):
@@ -551,9 +812,10 @@ class DataFrame():
         return self
     @collect_queryband(queryband="DF_fillna")
-    def fillna(self, value=None, columns=None, literal_value=False):
+    def fillna(self, value=None, columns=None, literal_value=False, partition_column=None):
         """
-        Method to replace the null values in a column with the value specified.
+        DESCRIPTION:
+            Method to replace the null values in a column with the value specified.
         PARAMETERS:
             value:
@@ -586,6 +848,12 @@ class DataFrame():
                 Default Value: False
                 Types: bool
+            partition_column:
+                Optional Argument.
+                Specifies the column name to partition the data.
+                Default Value: None
+                Types: str
         RETURNS:
             teradataml DataFrame
@@ -626,6 +894,26 @@ class DataFrame():
                 3    Blue Inc   90.0   50   95.0  101.0  17/01/04
                 4    Alpha Co  210.0  200  215.0  250.0  17/01/04
                 5  Orange Inc  210.0   50    NaN  250.0  17/01/04
+            # Example 3: Populate the null value in 'pclass' and
+            #            'fare' column with mean value with partition
+            #            column as 'sex'.
+            # Load the example data.
+            >>> load_example_data("teradataml", ["titanic"])
+            >>> df = DataFrame.from_table("titanic")
+            >>> df.fillna(value="mean", columns=["pclass", "fare"], partition_column="sex")
+                passenger  survived  pclass                                         name     sex   age  sibsp  parch            ticket      fare cabin embarked
+            0        284         1       3                   Dorking, Mr. Edward Arthur    male  19.0      0      0        A/5. 10482    8.0500  None        S
+            1        589         0       3                        Gilinski, Mr. Eliezer    male  22.0      0      0             14973    8.0500  None        S
+            2         17         0       3                         Rice, Master. Eugene    male   2.0      4      1            382652   29.1250  None        Q
+            3        282         0       3             Olsson, Mr. Nils Johan Goransson    male  28.0      0      0            347464    7.8542  None        S
+            4        608         1       1                  Daniel, Mr. Robert Williams    male  27.0      0      0            113804   30.5000  None        S
+            5        404         0       3               Hakkarainen, Mr. Pekka Pietari    male  28.0      1      0  STON/O2. 3101279   15.8500  None        S
+            6        427         1       2  Clarke, Mrs. Charles V (Ada Maria Winfield)  female  28.0      1      0              2003   26.0000  None        S
+            7        141         0       3                Boulos, Mrs. Joseph (Sultana)  female   NaN      0      2              2678   15.2458  None        C
+            8        610         1       1                    Shutes, Miss. Elizabeth W  female  40.0      0      0          PC 17582  153.4625  C125        S
+            9        875         1       2        Abelson, Mrs. Samuel (Hannah Wizosky)  female  28.0      1      0         P/PP 3381   24.0000  None        C
         """
         from teradataml import SimpleImputeFit, SimpleImputeTransform
@@ -633,6 +921,7 @@ class DataFrame():
         arg_info_matrix.append(["value", value, True, (int, float, str, dict, list)])
         arg_info_matrix.append(["columns", columns, True, (list, str, tuple)])
         arg_info_matrix.append(["literal_value", literal_value, True, (bool)])
+        arg_info_matrix.append(["partition_column", partition_column, True, (str)])
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_matrix)
@@ -704,9 +993,15 @@ class DataFrame():
                                   literals=literals,
                                   literals_columns=literals_columns,
                                   stats=stats,
-                                  stats_columns=stats_columns)
+                                  stats_columns=stats_columns,
+                                  partition_column=partition_column)
-        return fit_obj.transform(data=self).result
+        impute_transform = {
+            'data': self,
+            'data_partition_column': partition_column,
+            'object_partition_column': partition_column}
+        return fit_obj.transform(**impute_transform).result
     def __execute_node_and_set_table_name(self, nodeid, metaexpr=None):
         """
@@ -789,7 +1084,10 @@ class DataFrame():
         Private method for setting _metaexpr and retrieving column names and types.
         PARAMETERS:
-            metaexpr - Parent meta data (_MetaExpression object).
+            metaexpr:
+                Required Argument.
+                Specifies parent meta data (_MetaExpression object).
+                Types: _MetaExpression
         RETURNS:
             None
@@ -802,15 +1100,19 @@ class DataFrame():
         self._column_names_and_types = []
         self._td_column_names_and_types = []
         self._td_column_names_and_sqlalchemy_types = {}
-        for col in metaexpr.c:
+        self._column_types = {}
+        for col in self._metaexpr.c:
             if isinstance(col.type, sqlalchemy.sql.sqltypes.NullType):
                 tdtype = TeradataTypes.TD_NULL_TYPE.value
             else:
                 tdtype = "{}".format(col.type)
-            self._column_names_and_types.append((str(col.name), UtilFuncs._teradata_type_to_python_type(col.type)))
+            py_type = UtilFuncs._teradata_type_to_python_type(col.type)
+            self._column_names_and_types.append((str(col.name), py_type))
             self._td_column_names_and_types.append((str(col.name), tdtype))
             self._td_column_names_and_sqlalchemy_types[(str(col.name)).lower()] = col.type
+            self._column_types[(str(col.name)).lower()] = [py_type, col.type]
     def _get_metaexpr(self):
         """
@@ -829,7 +1131,24 @@ class DataFrame():
         meta = sqlalchemy.MetaData()
         db_schema = UtilFuncs._extract_db_name(self._table_name)
         db_table_name = UtilFuncs._extract_table_name(self._table_name)
-        t = sqlalchemy.Table(db_table_name, meta, schema=db_schema, autoload_with=eng)
+        if not self._datalake:
+            t = sqlalchemy.Table(db_table_name, meta, schema=db_schema, autoload_with=eng)
+            return _MetaExpression(t)
+        # Get metaexpression for datalake table.
+        # check existence of datalake table.
+        tdmlctx.get_connection().dialect.has_table(tdmlctx.get_connection(),
+                                                   self._table,
+                                                   schema=self._database,
+                                                   table_only=True,
+                                                   datalake=self._datalake)
+        # Extract column names and corresponding teradatasqlalchemy types.
+        col_names, col_types = df_utils._get_datalake_table_columns_info(self._database,
+                                                                          self._table,
+                                                                          self._datalake)
+        t = sqlalchemy.Table(self._table, meta, schema=self._database,
+                             *(Column(col_name, col_type) for col_name, col_type in zip(col_names, col_types)))
         return _MetaExpression(t)
     def __getattr__(self, name):
@@ -2066,7 +2385,7 @@ class DataFrame():
             else:
                 col_filters = col_names
-            col_filters_decode = ["decode(\"{}\", null, 0, 1)".format(col_name) for col_name in col_filters]
+            col_filters_decode = ["CASE WHEN \"{}\" IS NULL THEN 0 ELSE 1 END".format(col_name) for col_name in col_filters]
             fmt_filter = " + ".join(col_filters_decode)
             if thresh is not None:
@@ -2605,9 +2924,10 @@ class DataFrame():
             msg = Messages.get_message(errcode)
             raise TeradataMlException(msg, errcode)
+    @argument_deprecation("20.0.0.5", "include", False, None)
     @collect_queryband(queryband="DF_describe")
     def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None,
-                 columns=None):
+                 columns=None, pivot=False):
         """
         DESCRIPTION:
             Generates statistics for numeric columns. This function can be used in two modes:
@@ -2639,12 +2959,12 @@ class DataFrame():
             include:
                 Optional Argument.
                 Values can be either None or "all".
-                If the value is "all", then both numeric and non-numeric columns are included.
+                If the value is "all", both numeric and non-numeric columns are included.
                 Computes count, mean, std, min, percentiles, and max for numeric columns.
                 Computes count and unique for non-numeric columns.
                 If the value is None, only numeric columns are used for collecting statistics.
                 Note:
-                    Value 'all' is not applicable for 'Time Series Aggregate Mode'.
+                    * Value 'all' is not applicable for 'Time Series Aggregate Mode'.
                 Default Values: None
                 Types: str
@@ -2684,7 +3004,14 @@ class DataFrame():
                 Specifies the name(s) of the columns we are collecting statistics for.
                 Default Values: None
                 Types: str or List of str
+            pivot:
+                Optional Argument.
+                Specifies a boolean value to pivot the output.
+                Note:
+                    * "pivot" is not supported for PTI tables.
+                Default Values: 'False'
+                Types: bool
         RETURNS:
             teradataml DataFrame
@@ -2706,7 +3033,7 @@ class DataFrame():
             Orange Inc  210.0  None  None   250  04/01/2017
             # Computes count, mean, std, min, percentiles, and max for numeric columns.
-            >>> df.describe()
+            >>> df.describe(pivot=True)
                       Apr      Feb     Mar     Jan
             func
             count       4        6       4       4
@@ -2718,8 +3045,45 @@ class DataFrame():
             75%       250    207.5  158.75   162.5
             max       250      210     215     200
+            # Computes count, mean, std, min, percentiles, and max for numeric columns with
+            # default arugments.
+            >>> df.describe()
+            ATTRIBUTE   StatName            StatValue
+            Jan	        MAXIMUM	            200.0
+            Jan	        STANDARD DEVIATION	62.91528696058958
+            Jan	        PERCENTILES(25)	    125.0
+            Jan	        PERCENTILES(50)	    150.0
+            Mar	        COUNT	            4.0
+            Mar	        MINIMUM	            95.0
+            Mar	        MAXIMUM	            215.0
+            Mar	        MEAN	            147.5
+            Mar	        STANDARD DEVIATION	49.749371855331
+            Mar	        PERCENTILES(25)	    128.75
+            Mar	        PERCENTILES(50)	    140.0
+            Apr	        COUNT	            4.0
+            Apr	        MINIMUM	            101.0
+            Apr	        MAXIMUM	            250.0
+            Apr	        MEAN	            195.25
+            Apr	        STANDARD DEVIATION	70.97123830585646
+            Apr	        PERCENTILES(25)	    160.25
+            Apr	        PERCENTILES(50)	    215.0
+            Apr	        PERCENTILES(75)	    250.0
+            Feb	        COUNT	            6.0
+            Feb	        MINIMUM	            90.0
+            Feb	        MAXIMUM	            210.0
+            Feb	        MEAN	            166.66666666666666
+            Feb	        STANDARD DEVIATION	59.553897157672786
+            Feb	        PERCENTILES(25)	    117.5
+            Feb	        PERCENTILES(50)	    200.0
+            Feb	        PERCENTILES(75)	    207.5
+            Mar	        PERCENTILES(75)	    158.75
+            Jan	        PERCENTILES(75)	    162.5
+            Jan	        MEAN	            137.5
+            Jan	        MINIMUM	            50.0
+            Jan	        COUNT	            4.0
             # Computes count, mean, std, min, percentiles, and max for numeric columns with 30th and 60th percentiles.
-            >>> df.describe(percentiles=[.3, .6])
+            >>> df.describe(percentiles=[.3, .6], pivot=True)
                       Apr      Feb     Mar     Jan
             func
             count       4        6       4       4
@@ -2732,7 +3096,7 @@ class DataFrame():
             # Computes count, mean, std, min, percentiles, and max for numeric columns group by "datetime" and "Feb".
             >>> df1 = df.groupby(["datetime", "Feb"])
-            >>> df1.describe()
+            >>> df1.describe(pivot=True)
                                      Jan   Mar   Apr
             datetime   Feb   func
             04/01/2017 90.0  25%      50    95   101
@@ -2760,22 +3124,6 @@ class DataFrame():
                              min     200   215   250
                              std    None  None     0
-            # Computes count, mean, std, min, percentiles, and max for numeric columns and
-            # computes count and unique for non-numeric columns
-            >>> df.describe(include="all")
-                   accounts      Feb     Jan     Mar     Apr datetime
-            func
-            25%        None    117.5     125  128.75  160.25     None
-            75%        None    207.5   162.5  158.75     250     None
-            count         6        6       4       4       4        6
-            mean       None  166.667   137.5   147.5  195.25     None
-            max        None      210     200     215     250     None
-            min        None       90      50      95     101     None
-            50%        None      200     150     140     215     None
-            std        None   59.554  62.915  49.749  70.971     None
-            unique        6     None    None    None    None        1
-            #
             # Examples for describe() function as Time Series Aggregate.
             #
             >>> # Load the example datasets.
@@ -2958,7 +3306,7 @@ class DataFrame():
             >>>
         """
-        # Argument validations
+        # -------------Argument validations---------------#
         awu_matrix = []
         awu_matrix.append(["columns", columns, True, (str, list), True])
         awu_matrix.append(["percentiles", percentiles, True, (float, list)])
@@ -2967,6 +3315,7 @@ class DataFrame():
         awu_matrix.append(["distinct", distinct, True, (bool)])
         awu_matrix.append(["statistics", statistics, True, (str, list), True,
                            ["count", "mean", "min", "max", "unique", "std", "describe", "percentile"]])
+        awu_matrix.append(["pivot", pivot, True, (bool)])
         # Validate argument types
         _Validators._validate_function_arguments(awu_matrix)
@@ -3010,22 +3359,27 @@ class DataFrame():
         if verbose and not isinstance(self, DataFrameGroupByTime):
             raise ValueError(Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format(
                 'verbose', 'Aggregation', 'True', 'describe()', 'DataFrameGroupByTime'))
+        # -------------End of argument validations---------------#
         function_label = "func"
+        sort_cols = []
         try:
             self.__execute_node_and_set_table_name(self._nodeid)
             groupby_column_list = None
-            if isinstance(self, DataFrameGroupBy):
+            if isinstance(self, DataFrameGroupByTime) or isinstance(self, DataFrameGroupBy):
                 groupby_column_list = self.groupby_column_list
-                df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
-                                                  groupby_column_list=groupby_column_list)
+                if columns:
+                    df_utils._validate_describe_columns(columns=columns, metaexpr=self._metaexpr,
+                                                        groupby_column_list=groupby_column_list)
+                sort_cols = list(groupby_column_list)
-            if isinstance(self, DataFrameGroupByTime):
-                groupby_column_list = self.groupby_column_list
-                df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
-                                                  groupby_column_list=groupby_column_list)
+            # 'func' column will be always there in result.
+            sort_cols.append(function_label)
+            # Handle DataFrameGroupByTime using union all approach and
+            # other DataFrames using TD_UnivariateStatistics approach.
+            if isinstance(self, DataFrameGroupByTime):
                 # Construct the aggregate query.
                 agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
                                                                percentiles=percentiles, function_label=function_label,
@@ -3037,29 +3391,99 @@ class DataFrame():
                                                                timecode_column=self._timecode_column,
                                                                sequence_column=self._sequence_column,
                                                                fill=self._fill)
+                if groupby_column_list is not None:
+                    df = DataFrame.from_query(agg_query, index_label=sort_cols)
+                    df2 = df.sort(sort_cols)
+                    df2._metaexpr._n_rows = 100
+                    describe_df = df2
+                else:
+                    describe_df = DataFrame.from_query(agg_query, index_label=function_label)
+                # Check if numeric overflow can occur for result DataFrame.
+                if self._check_numeric_overflow(describe_df):
+                    result_df = self._promote_dataframe_types()
+                    describe_df = result_df.describe(pivot=True)
+                return describe_df
             else:
-                # Construct the aggregate query.
-                agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
-                                                               percentiles=percentiles, function_label=function_label,
-                                                               groupby_column_list=groupby_column_list, include=include,
-                                                               is_time_series_aggregate=False, verbose=verbose,
-                                                               distinct=distinct, statistics=statistics)
-            if groupby_column_list is not None:
-                sort_cols = [i for i in groupby_column_list]
-                sort_cols.append(function_label)
-                df = DataFrame.from_query(agg_query, index_label=sort_cols)
-                df2 = df.sort(sort_cols)
-                df2._metaexpr._n_rows = 100
-                describe_df = df2
-            else:
-                describe_df = DataFrame.from_query(agg_query, index_label=function_label)
+                # If pivot is True, then construct the aggregate query and return the result DataFrame.
+                # Otherwise, return the result DataFrame in the regular aggregate mode using UnivariateStatistics.
+                if pivot:
+                    # Construct the aggregate query.
+                    agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
+                                                                percentiles=percentiles, function_label=function_label,
+                                                                groupby_column_list=groupby_column_list, include=include,
+                                                                is_time_series_aggregate=False, verbose=verbose,
+                                                                distinct=distinct, statistics=statistics)
+                    if groupby_column_list is not None:
+                        sort_cols = [i for i in groupby_column_list]
+                        sort_cols.append(function_label)
+                        df = DataFrame.from_query(agg_query, index_label=sort_cols)
+                        df2 = df.sort(sort_cols)
+                        df2._metaexpr._n_rows = 100
+                        describe_df = df2
+                    else:
+                        describe_df = DataFrame.from_query(agg_query, index_label=function_label)
+                    # Check if numeric overflow can occur for result DataFrame.
+                    if self._check_numeric_overflow(describe_df):
+                        result_df = self._promote_dataframe_types()
+                        describe_df = result_df.describe(pivot=True)
+                    return describe_df
+                # If columns is None, then all dataframe columns are considered.
+                if columns is None:
+                    columns = self.columns
+                    # Exclude groupby columns
+                    if groupby_column_list is not None:
+                        columns = [col for col in columns if col not in groupby_column_list]
+                numeric_cols = []
+                # Extract numeric columns and their types of all columns
+                for col in self._metaexpr.c:
+                    if type(col.type) in UtilFuncs()._get_numeric_datatypes() and \
+                    col.name in columns:
+                        numeric_cols.append(col.name)
+                if numeric_cols:
+                    # Default statistics for 'Regular Aggregate Mode'
+                    sql_stat = ["COUNT", "MAXIMUM", "MEAN", "MINIMUM", "PERCENTILES", "STANDARD DEVIATION"]
+                    if statistics is not None:
+                        py_to_sql_func_map = {"count": "COUNT",
+                                              "max": "MAXIMUM",
+                                              "mean": "MEAN",
+                                              "unique": 'UNIQUE ENTITY COUNT',
+                                              "min": "MINIMUM",
+                                              "percentile": "PERCENTILES",
+                                              "std": "STANDARD DEVIATION"}
+                        # Convert statistics into corresponding SQL function names
+                        sql_stat = [py_to_sql_func_map[stat] for stat in UtilFuncs()._as_list(statistics)]
+                    # Convert percentiles to centiles for univariate statistics
+                    centiles = list(map(lambda n: int(n * 100), percentiles))
+                    # UnivariateStatistics parameters
+                    univar_param = {
+                        "newdata": self.select(self.columns),
+                        "target_columns": numeric_cols,
+                        "partition_columns": groupby_column_list,
+                        "centiles": centiles,
+                        "stats": sql_stat
+                    }
+                    from teradataml import UnivariateStatistics
+                    # Run UnivariateStatistics
+                    aggr_df = UnivariateStatistics(**univar_param).result
+                    # Return the result in teradataml format
+                    return aggr_df
-            # Check if numeric overflow can occur for result DataFrame.
-            if self._check_numeric_overflow(describe_df):
-                result_df = self._promote_dataframe_types()
-                describe_df = result_df.describe()
-            return describe_df
         except TeradataMlException:
             raise
         except Exception as err:
@@ -5555,7 +5979,7 @@ class DataFrame():
         try:
             # Printing the DF will actually run underlying select query and
             # will brought up numeric overflow if any. Only materializing won't work.
-            print(result_df)
+            repr(result_df)
             return False
         except TeradataMlException as tme:
             if "Numeric overflow occurred during computation" in str(tme):
@@ -5642,7 +6066,35 @@ class DataFrame():
     def _repr_html_(self):
         """ Print method for teradataml for iPython rich display. """
+        self._generate_output_html()
+        if display.enable_ui:
+            # EDA Ui widget representation using teradatamlwidgets
+            if self._eda_ui is None:
+                from teradatamlwidgets.eda.Ui import Ui
+                self._eda_ui = Ui(df=self, html=self.html)
+            else:
+                self._eda_ui.display_ui()
+        return self.html
+    def get_eda_ui(self):
+        """
+        Returns the EDA representation UI.
+        PARAMETERS:
+            None.
+        EXCEPTIONS:
+            None.
+        RETURNS:
+            teradatamlwidgets.eda.Ui
+        EXAMPLE:
+            df = ui.get_eda_ui()
+        """
+        return self._eda_ui
+    def _generate_output_html(self, disable_types=True):
         # Check if class attributes __data and __data_columns are not None.
         # If not None, reuse the data and columns.
         # If None, generate latest results.
@@ -5655,17 +6107,25 @@ class DataFrame():
         dindent = indent + indent
         header_html = ['<style type="text/css">',
-                       'table {border:ridge 5px;}',
+                       'table { border:ridge 5px}',
                        'table td {border:inset 1px;}',
-                       'table tr#HeaderRow {background-color:grey; color:white;}'
+                       'table tr#HeaderRow {background-color:grey; color:white;}',
                        '</style>\n'
                        ]
         html = "\n{0}".format(indent).join(header_html)
-        html += '<html><table>\n{0}<tr id="HeaderRow">\n'.format(indent)
+        html += '<html><table style="min-width:1000px;">\n{0}<tr id="HeaderRow">\n'.format(indent)
-        columns_html = "</th>\n{0}<th>".format(dindent).join(self.__data_columns)
-        html += "{0}<th>{1}</th>\n".format(dindent, columns_html)
-        html += "{0}</tr>\n".format(indent)
+        columns_html = "</th><th>".join(self.__data_columns)
+        html += "<th>{0}</th>\n".format(columns_html)
+        html += "</tr>\n"
+        if not disable_types:
+            html += '<tr>\n'.format(indent)
+            col_types = [repr(self._td_column_names_and_sqlalchemy_types[column]) for column in
+                         self.__data_columns]
+            columns_types_html = "</td>\n{0}<td>".format(dindent).join(col_types)
+            html += "{0}<td>{1}</td>\n".format(dindent, columns_types_html)
+            html += "{0}</tr>\n".format(indent)
         for row in self.__data:
             row_html = ["{0}<td>{1}</td>\n".format(dindent,
@@ -5673,8 +6133,31 @@ class DataFrame():
             html += "{1}<tr>\n{0}{1}</tr>\n".format("".join(row_html), indent)
         html += "</table></html>"
+        self.html = html
+    def get_output(self, output_index=0):
+        """
+        DESCRIPTION:
+            Returns the result of analytic function when analytic function is
+            run from 'Analyze' tab in EDA UI.
+            Note:
+                * The function does not return anything if analytic function is
+                  not run from EDA UI.
-        return html
+        PARAMETERS:
+            output_index:
+                Optional Argument.
+                Specifies the index of the output dataframe to be returned.
+                Default Value: 0
+                Types: int
+        RAISES:
+            IndexError
+        RETURNS:
+            teradataml DataFrame object.
+        """
+        return self._eda_ui.get_output_dataframe(output_index=output_index)
     def __get_data_columns(self):
         """
@@ -6019,6 +6502,8 @@ class DataFrame():
                     * "open_sessions" specifies the number of Teradata data transfer
                       sessions to be opened for fastexport. This argument is only applicable
                       in fastexport mode.
+                    * Function returns the pandas dataframe with Decimal columns types as float instead of object.
+                      If user want datatype to be object, set argument "coerce_float" to False.
                 Notes:
                     1. For additional information about "coerce_float" and
@@ -6334,15 +6819,22 @@ class DataFrame():
             Supported join operators are =, ==, <, <=, >, >=, <> and != (= and <> operators are
             not supported when using DataFrame columns as operands).
-            Note:
-                1.  When multiple join conditions are given, they are joined using AND boolean
-                    operator. Other boolean operators are not supported.
-                2.  Nesting of join on conditions in column expressions using & and | is not
-                    supported. The example for unsupported nested join on conditions is:
-                    on = [(df1.a == df1.b) & (df1.c == df1.d)]
+            Notes:
+                1.  When multiple join conditions are given as a list string/ColumnExpression,
+                    they are joined using AND operator.
+                2.  Two or more on conditions can be combined using & and | operators
+                    and can be passed as single ColumnExpression.
+                    You can use (df1.a == df1.b) & (df1.c == df1.d) in place of
+                    [df1.a == df1.b, df1.c == df1.d].
+                3.  Two or more on conditions can not be combined using pythonic 'and'
+                    and 'or'.
+                    You can use (df1.a == df1.b) & (df1.c == df1.d) in place of
+                    [df1.a == df1.b and df1.c == df1.d].
+                4.  Performing self join using same DataFrame object in 'other'
+                    argument is not supported. In order to perform self join,
+                    first create aliased DataFrame using alias() API and pass it
+                    for 'other' argument. Refer to Example 10 in EXAMPLES section.
-                    You can use [df1.a == df1.b, df1.c == df1.d] in place of
-                    [(df1.a == df1.b) & (df1.c == df1.d)].
         PARAMETERS:
@@ -6370,15 +6862,20 @@ class DataFrame():
                   is the column of left dataframe df1 and col2 is the column of right
                   dataframe df2.
                   Examples:
-                    1. [df1.a == df2.a, df1.b == df2.b] indicates df1.a = df2.a and df1.b = df2.b.
-                    2. [df1.a == df2.b, df1.c == df2.d] indicates df1.a = df2.b and df1.c = df2.d.
-                    3. [df1.a <= df2.b and df1.c > df2.d] indicates df1.a <= df2.b and df1.c > df2.d.
-                    4. [df1.a < df2.b and df1.c >= df2.d] indicates df1.a < df2.b and df1.c >= df2.d.
+                    1. [df1.a == df2.a, df1.b == df2.b] indicates df1.a = df2.a AND df1.b = df2.b.
+                    2. [df1.a == df2.b, df1.c == df2.d] indicates df1.a = df2.b AND df1.c = df2.d.
+                    3. [df1.a <= df2.b & df1.c > df2.d] indicates df1.a <= df2.b AND df1.c > df2.d.
+                    4. [df1.a < df2.b | df1.c >= df2.d] indicates df1.a < df2.b OR df1.c >= df2.d.
                     5. df1.a != df2.b indicates df1.a != df2.b.
                 • The combination of both string comparisons and comparisons as column expressions.
                   Examples:
-                    1. ["a", df1.b == df2.b] indicates df1.a = df2.a and df1.b = df2.b.
-                    2. [df1.a <= df2.b, "c > d"] indicates df1.a <= df2.b and df1.c > df2.d.
+                    1. ["a", df1.b == df2.b] indicates df1.a = df2.a AND df1.b = df2.b.
+                    2. [df1.a <= df2.b, "c > d"] indicates df1.a <= df2.b AND df1.c > df2.d.
+                • ColumnExpressions containing FunctionExpressions which represent SQL functions
+                  invoked on DataFrame Columns.
+                  Examples:
+                    1. (df1.a.round(1) - df2.a.round(1)).mod(2.5) > 2
+                    2. df1.a.floor() - df2.b.floor() > 2
                 Types: str (or) ColumnExpression (or) List of strings(str) or ColumnExpressions
@@ -6400,7 +6897,7 @@ class DataFrame():
                 Specifies the suffix to be added to the right table columns.
                 Default Value: None.
                 Types: str
             lprefix:
                 Optional Argument.
                 Specifies the prefix to be added to the left table columns.
@@ -6450,7 +6947,7 @@ class DataFrame():
             0       2       2  analytics      2.3      2.3    b  analytics    b
             1       1       1   teradata      1.3      1.3    a   teradata    a
-            # Example 2: One "on" argument condition is ColumnExpression and other is string having two
+            # Example 2: One "on" argument condition is ColumnExpression and other is string having two
             #            columns with left outer join.
             >>> df1.join(df2, on = [df1.col2 == df2.col4,"col5 = col7"], how = "left", lprefix = "t1", rprefix = "t2")
               t1_col1 t2_col1       col2  t1_col3  t2_col3 col5       col4  col7
@@ -6464,7 +6961,7 @@ class DataFrame():
             0       2       2  analytics      2.3      2.3    b  analytics    b
             1       1       1   teradata      1.3      1.3    a   teradata    a
-            # Example 4: One "on" argument condition is ColumnExpression and other is string having two
+            # Example 4: One "on" argument condition is ColumnExpression and other is string having two
             #            columns with full join.
             >>> df1.join(other = df2, on = ["col2=col4",df1.col5 == df2.col7], how = "full", lprefix = "t1", rprefix = "t2")
               t1_col1 t2_col1       col2  t1_col3  t2_col3  col5       col4  col7
@@ -6542,7 +7039,53 @@ class DataFrame():
             3         Beginner  Beginner    1   3.95      Beginner   3.70    Novice          0          1        no       yes
             3         Beginner  Beginner    2   3.76      Beginner   3.70    Novice          0          1        no       yes
             3         Beginner    Novice    3   3.70      Beginner   3.70    Novice          1          1        no        no
+            # Example 10: Perform self join using aliased DataFrame.
+            # Create an aliased DataFrame.
+            >>> lhs  = DataFrame("admissions_train").head(3).sort("id")
+            >>> rhs = lhs.alias("rhs")
+            # Use aliased DataFrame for self join.
+            >>> joined_df = lhs.join(other=rhs, how="cross", lprefix="l", rprefix="r")
+            >>> joined_df
+               l_id  r_id l_masters r_masters  l_gpa  r_gpa   l_stats   r_stats l_programming r_programming  l_admitted  r_admitted
+            0     1     3       yes        no   3.95   3.70  Beginner    Novice      Beginner      Beginner           0           1
+            1     2     2       yes       yes   3.76   3.76  Beginner  Beginner      Beginner      Beginner           0           0
+            2     2     3       yes        no   3.76   3.70  Beginner    Novice      Beginner      Beginner           0           1
+            3     3     1        no       yes   3.70   3.95    Novice  Beginner      Beginner      Beginner           1           0
+            4     3     3        no        no   3.70   3.70    Novice    Novice      Beginner      Beginner           1           1
+            5     3     2        no       yes   3.70   3.76    Novice  Beginner      Beginner      Beginner           1           0
+            6     2     1       yes       yes   3.76   3.95  Beginner  Beginner      Beginner      Beginner           0           0
+            7     1     2       yes       yes   3.95   3.76  Beginner  Beginner      Beginner      Beginner           0           0
+            8     1     1       yes       yes   3.95   3.95  Beginner  Beginner      Beginner      Beginner           0           0
+            # Example 11: Perform join with compound 'on' condition having
+            #             more than one binary operator.
+            >>> rhs_2 = lhs.assign(double_gpa=lhs.gpa * 2)
+            >>> joined_df_2 = lhs.join(rhs_2, on=rhs_2.double_gpa == lhs.gpa * 2, how="left", lprefix="l", rprefix="r")
+            >>> joined_df_2
+               l_id  r_id l_masters r_masters  l_gpa  r_gpa   l_stats   r_stats l_programming r_programming  l_admitted  r_admitted  double_gpa
+            0     3     3        no        no   3.70   3.70    Novice    Novice      Beginner      Beginner           1           1        7.40
+            1     2     2       yes       yes   3.76   3.76  Beginner  Beginner      Beginner      Beginner           0           0        7.52
+            2     1     1       yes       yes   3.95   3.95  Beginner  Beginner      Beginner      Beginner           0           0        7.90
+            # Example 12: Perform join on DataFrames with 'on' condition
+            #             having FunctionExpression.
+            >>> df = DataFrame("admissions_train")
+            >>> df2 = df.alias("rhs_df")
+            >>> joined_df_3 = df.join(df2, on=(df.gpa.round(1) - df2.gpa.round(1)).mod(2.5) > 2,
+            >>>                       how="inner", lprefix="l")
+            >>> joined_df_3.sort(["id", "l_id"])
+                l_id	id	l_masters	masters	 l_gpa	 gpa	 l_stats	   stats  l_programming	programming	l_admitted	admitted
+            0      1	24	      yes	     no	  3.95	1.87	Beginner	Advanced	   Beginner	     Novice	         0	       1
+            1     13	24	       no	     no	   4.0	1.87	Advanced	Advanced	     Novice	     Novice	         1	       1
+            2     15	24	      yes	     no	   4.0	1.87	Advanced	Advanced	   Advanced	     Novice	         1	       1
+            3     25	24	       no	     no	  3.96	1.87	Advanced	Advanced	   Advanced	     Novice	         1	       1
+            4     27	24	      yes	     no	  3.96	1.87	Advanced	Advanced	   Advanced	     Novice	         0	       1
+            5     29	24	      yes	     no	   4.0	1.87	  Novice	Advanced	   Beginner	     Novice          0	       1
+            6     40	24	     yes	     no	  3.95	1.87      Novice	Advanced	   Beginner	     Novice	         0	       1
         """
         # Argument validations
         awu_matrix = []
         awu_matrix.append(["other", other, False, (DataFrame)])
@@ -6556,17 +7099,11 @@ class DataFrame():
         # Validate argument types
         _Validators._validate_function_arguments(awu_matrix)
-        # If user has not provided suffix argument(s), then prefix argument(s) value(s) are passed by
-        # user hence we will set the affix variables (laffix and raffix) with provided value(s).
-        # affix_type is also set appropriately.
-        if lsuffix is not None or rsuffix is not None:
-            laffix = lsuffix
-            raffix = rsuffix
-            affix_type = "suffix"
-        else:
-            laffix = lprefix
-            raffix = rprefix
-            affix_type = "prefix"
+        # If self and other DataFrames are pointing to same Table object,
+        # raise error.
+        if self._metaexpr.t is other._metaexpr.t:
+            raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_ALIAS_REQUIRED, "join"),
+                                      MessageCodes.TDMLDF_ALIAS_REQUIRED)
         how_lc = how.lower()
@@ -6584,12 +7121,33 @@ class DataFrame():
         for col in other.columns:
             other_columns_lower_actual_map[col.lower()] = col
-        for column in self_columns_lower_actual_map.keys():
-            if column in other_columns_lower_actual_map.keys():
-                if laffix is None and raffix is None:
-                    raise TeradataMlException(
-                        Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
-                        MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
+        # Set the affix variables (laffix and raffix) with provided value(s)
+        # of lsuffix, rsuffix, lprefix and rprefix.
+        # Also set affix_type appropriately.
+        laffix = None
+        raffix = None
+        affix_type = None
+        if lsuffix is not None or rsuffix is not None:
+            laffix = lsuffix
+            raffix = rsuffix
+            affix_type = "suffix"
+        elif lprefix is not None or rprefix is not None:
+            laffix = lprefix
+            raffix = rprefix
+            affix_type = "prefix"
+        # Same column names can be present in two dataframes involved
+        # in join operation in below two cases:
+        # Case 1: Self join.
+        # Case 2: Two tables having common column names.
+        # In any case, at least one kind of affix is required to generate
+        # distinct column names in resultant table. Throw error if no affix
+        # is available.
+        if not set(self_columns_lower_actual_map.keys()).isdisjoint(other_columns_lower_actual_map.keys()):
+            if affix_type is None:
+                raise TeradataMlException(
+                    Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
+                    MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
         # Both affixes should not be equal to perform join.
         if laffix == raffix and laffix is not None:
@@ -6598,115 +7156,159 @@ class DataFrame():
                                      "'l{affix_type}' and 'r{affix_type}'".format(affix_type=affix_type)),
                 MessageCodes.TDMLDF_INVALID_TABLE_ALIAS)
-        if how_lc != "cross":
-            if isinstance(on, str) or isinstance(on, ColumnExpression):
-                on = [on]
-            all_join_conditions = []
-            invalid_join_conditions = []
-            # Forming join condition
-            for condition in on:
-                ori_condition = condition
-                if not isinstance(condition, (ColumnExpression, str)):
-                    invalid_join_conditions.append(condition)
-                    continue
-                # Process only when the on condition is string or a ColumnExpression
-                if isinstance(condition, ColumnExpression):
-                    columns = condition.original_column_expr
-                    condition = condition.compile()
-                for op in TeradataConstants.TERADATA_JOIN_OPERATORS.value:
-                    if op in condition:
-                        conditional_separator = op
-                        break
-                else:
-                    # If no join condition is mentioned, default is taken as equal.
-                    # If on is ['a'], then it is equal to 'df1.a = df2.a'
-                    columns = [condition, condition]
-                    condition = "{0} = {0}".format(condition)
-                    conditional_separator = "="
-                if isinstance(ori_condition, str):
-                    columns = [column.strip() for column in condition.split(sep=conditional_separator)
-                               if len(column) > 0]
-                if len(columns) != 2:
-                    invalid_join_conditions.append(condition)
-                else:
-                    left_col = self.__add_alias_to_column(columns[0], self, laffix if laffix is not None else "df1")
-                    right_col = self.__add_alias_to_column(columns[1], other, raffix if raffix is not None else "df2")
-                    if conditional_separator == "!=":
-                        # "!=" is python way of expressing 'not equal to'. "<>" is Teradata way of
-                        # expressing 'not equal to'. Adding support for "!=".
-                        conditional_separator = "<>"
-                    all_join_conditions.append('{0} {1} {2}'.format(left_col, conditional_separator, right_col))
-            if len(invalid_join_conditions) > 0:
-                raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_JOIN_CONDITION,
-                                                               ", ".join(invalid_join_conditions)),
-                                          MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
-            join_condition = " and ".join(all_join_conditions)
-        else:
-            join_condition = ""
-        df1_columns_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr)
-        df2_columns_types = df_utils._get_required_columns_types_from_metaexpr(other._metaexpr)
-        select_columns = []
-        new_metaexpr_columns_types = OrderedDict()
-        for column in self.columns:
-            if df_utils._check_column_exists(column.lower(), other_columns_lower_actual_map.keys()):
-                # Check if column found in other DataFrame has same case or different.
-                # Return the column name from the other DataFrame.
-                other_column = other_columns_lower_actual_map[column.lower()]
-                df1_column_with_affix = self.__check_and_return_new_column_name(laffix, other_column,
-                                                                                other_columns_lower_actual_map.keys(),
-                                                                                "right", affix_type)
-                select_columns.append("{0} as {1}".format(
-                    self.__get_fully_qualified_col_name(other_column, "df1" if laffix is None else laffix),
-                    df1_column_with_affix))
-                df2_column_with_affix = self.__check_and_return_new_column_name(raffix, column,
-                                                                                self_columns_lower_actual_map.keys(),
-                                                                                "left", affix_type)
-                select_columns.append("{0} as {1}".format(
-                    self.__get_fully_qualified_col_name(column, "df2" if raffix is None else raffix),
-                    df2_column_with_affix))
-                # As we are creating new column name, adding it to new metadata dict for new dataframe from join.
-                self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
-                                                    UtilFuncs._teradata_unquote_arg(df1_column_with_affix, "\""),
-                                                    column, df1_columns_types)
-                self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
-                                                    UtilFuncs._teradata_unquote_arg(df2_column_with_affix, "\""),
-                                                    other_column, df2_columns_types)
+        try:
+            # Set an attribute named '_join_alias' to underlying SQLAlchemy table objects
+            # and use it as default alias for compiling.
+            setattr(self._metaexpr.t, "_join_alias", "lhs")
+            setattr(other._metaexpr.t, "_join_alias", "rhs")
+            lhs_alias = "lhs"
+            rhs_alias = "rhs"
+            # Step 1: Generate the on clause string.
+            if how_lc != "cross":
+                on = UtilFuncs._as_list(on)
+                all_join_conditions = []
+                invalid_join_conditions = []
+                # Forming join condition
+                for condition in on:
+                    # Process only when the on condition is either a string or a ColumnExpression.
+                    if not isinstance(condition, (ColumnExpression, str)):
+                        invalid_join_conditions.append(condition)
+                        continue
+                    # Generate final on clause string from string representation of condition.
+                    if isinstance(condition, str):
+                        # Process the string manually.
+                        # 1. Parse the string to get operator.
+                        for op in TeradataConstants.TERADATA_JOIN_OPERATORS.value:
+                            if op in condition:
+                                conditional_separator = op
+                                break
+                        else:
+                            # If no join condition is mentioned, then string represents the column.
+                            # In this case, default operator is taken as equal.
+                            # If on is ['a'], then it is equal to 'lhs.a = rhs.a'
+                            columns = [condition, condition]
+                            condition = "{0} = {0}".format(condition)
+                            conditional_separator = "="
+                        # 2. Split the string using operator and extract LHS and RHS
+                        # columns from a binary expression.
+                        columns = [column.strip() for column in condition.split(sep=conditional_separator)
+                                   if len(column) > 0]
+                        if len(columns) != 2:
+                            invalid_join_conditions.append(condition)
+                            # TODO: Raise exception here only.
+                        else:
+                            # 3. Generate fully qualified names using affix and table alias
+                            # and create final on clause condition string.
+                            left_col = self.__add_alias_to_column(columns[0], self, lhs_alias)
+                            right_col = self.__add_alias_to_column(columns[1], other, rhs_alias)
+                            if conditional_separator == "!=":
+                                # "!=" is python way of expressing 'not equal to'. "<>" is Teradata way of
+                                # expressing 'not equal to'. Adding support for "!=".
+                                conditional_separator = "<>"
+                            all_join_conditions.append(
+                                '{0} {1} {2}'.format(left_col, conditional_separator, right_col))
+                    # Generate on clause string from column expression.
+                    if isinstance(condition, ColumnExpression):
+                        compiled_condition = condition.compile(compile_kwargs={'include_table': True,
+                                                                               'literal_binds': True,
+                                                                               'table_name_kind': '_join_alias',
+                                                                               'compile_with_caller_table': True,
+                                                                               'table_only': True})
+                        all_join_conditions.append(compiled_condition)
+                # Raise error if invalid on conditions are passed.
+                if len(invalid_join_conditions) > 0:
+                    raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_JOIN_CONDITION,
+                                                                   ", ".join(invalid_join_conditions)),
+                                              MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
+                # Generate final on condition.
+                join_condition = " and ".join(all_join_conditions)
             else:
-                # As column not present in right DataFrame, directly adding column to new metadata dict.
-                self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, df1_columns_types)
-                select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
+                # In case of cross join no need of condition.
+                join_condition = ""
-        for column in other.columns:
-            if not df_utils._check_column_exists(column.lower(), self_columns_lower_actual_map.keys()):
-                # As column not present in left DataFrame, directly adding column to new metadata dict.
-                self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, df2_columns_types)
-                select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
+            # Step 2: Generate the select clause string.
+            # Generate new column names for overlapping column names using lsuffix, rsuffix, lprefix, rprefix.
+            # Also, use table alias while addressing overlapping column names.
+            lhs_columns_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr)
+            rhs_columns_types = df_utils._get_required_columns_types_from_metaexpr(other._metaexpr)
-        # Create a node in AED using _aed_join
-        join_node_id = self._aed_utils._aed_join(self._nodeid, other._nodeid, ", ".join(select_columns), how_lc,
-                                                 join_condition, "df1" if laffix is None else laffix,
-                                                 "df2" if raffix is None else raffix)
+            select_columns = []
+            new_metaexpr_columns_types = OrderedDict()
-        # Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid and underlying table name.
-        new_metaexpr = UtilFuncs._get_metaexpr_using_columns(join_node_id, new_metaexpr_columns_types.items())
+            # Processing columns in LHS DF/ self DF.
+            for column in self.columns:
+                if df_utils._check_column_exists(column.lower(), other_columns_lower_actual_map.keys()):
+                    # Check if column found in other DataFrame has same case or different.
+                    # Return the column name from the other DataFrame.
+                    other_column = other_columns_lower_actual_map[column.lower()]
+                    # Check if column name in LHS dataframe is same as that of in RHS dataframe.
+                    # If so, generate new name for LHS DF column using provided affix.
+                    df1_column_with_affix = self.__check_and_return_new_column_name(laffix, other_column,
+                                                                                    other_columns_lower_actual_map.keys(),
+                                                                                    "right", affix_type)
+                    # Generate select clause string for current column and append to list.
+                    select_columns.append("{0} as {1}".format(
+                        self.__get_fully_qualified_col_name(other_column, lhs_alias),
+                        df1_column_with_affix))
+                    # Check if column name in RHS dataframe is same as that of in LHS dataframe.
+                    # If so, generate new name for RHS DF column using provided affix.
+                    df2_column_with_affix = self.__check_and_return_new_column_name(raffix, column,
+                                                                                    self_columns_lower_actual_map.keys(),
+                                                                                    "left", affix_type)
+                    # Generate select clause string for current column and append to list.
+                    select_columns.append("{0} as {1}".format(
+                        self.__get_fully_qualified_col_name(column, rhs_alias),
+                        df2_column_with_affix))
+                    # As we are creating new column name, adding it to new metadata dict for new dataframe from join.
+                    self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
+                                                        UtilFuncs._teradata_unquote_arg(df1_column_with_affix, "\""),
+                                                        column, lhs_columns_types)
+                    self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
+                                                        UtilFuncs._teradata_unquote_arg(df2_column_with_affix, "\""),
+                                                        other_column, rhs_columns_types)
-        return self._create_dataframe_from_node(join_node_id, new_metaexpr, self._index_label)
+                else:
+                    # As column with same name is not present in RHS DataFrame now,
+                    # directly adding column to new metadata dict.
+                    self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, lhs_columns_types)
+                    select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
+            # Processing columns in RHS DF/ other DF.
+            # Here we will only be processing columns which are not overlapping.
+            for column in other.columns:
+                if not df_utils._check_column_exists(column.lower(), self_columns_lower_actual_map.keys()):
+                    # As column not present in left DataFrame, directly adding column to new metadata dict.
+                    self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, rhs_columns_types)
+                    select_columns.append(UtilFuncs._teradata_quote_arg(column, "\"", False))
+            # Step 3: Create a node in AED using _aed_join using appropriate alias for involved tables.
+            join_node_id = self._aed_utils._aed_join(self._nodeid, other._nodeid, ", ".join(select_columns),
+                                                     how_lc, join_condition, lhs_alias, rhs_alias)
+            # Step 4: Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid
+            # and underlying table name.
+            new_metaexpr = UtilFuncs._get_metaexpr_using_columns(join_node_id, new_metaexpr_columns_types.items())
+            # Return a new joined dataframe.
+            return self._create_dataframe_from_node(join_node_id, new_metaexpr, self._index_label)
+        finally:
+            # Delete the '_join_alias' attribute attached to underlying
+            # SQLALchemy table objects.
+            delattr(self._metaexpr.t, "_join_alias")
+            delattr(other._metaexpr.t, "_join_alias")
     def __add_alias_to_column(self, column, df, alias):
         """
@@ -6766,7 +7368,7 @@ class DataFrame():
         return "{0}.{1}".format(UtilFuncs._teradata_quote_arg(alias, "\"", False),
                                 UtilFuncs._teradata_quote_arg(column, "\"", False))
-    def __check_and_return_new_column_name(self, affix, column, col_list, df_side, affix_type):
+    def __check_and_return_new_column_name(self, affix, column, col_list, other_df_side, affix_type):
         """
          Check new column name alias with column exists in col_list or not, if exists throws exception else
          returns new column name.
@@ -6775,7 +7377,7 @@ class DataFrame():
              affix  - affix to be added to column.
              column - column name.
              col_list - list of columns to check in which new column is exists or not.
-             df_side - Side of the dataframe.
+             other_df_side - Side on which the other dataframe in current join operation resides.
              affix_type - Type of affix. Either "prefix" or "suffix".
          EXAMPLES:
@@ -6789,19 +7391,19 @@ class DataFrame():
             return UtilFuncs._teradata_quote_arg(column, "\"", False)
         # If Prefix, affix is added before column name else it is appended.
-        df1_column_with_affix = "{0}_{1}" if affix_type == "prefix" else "{1}_{0}"
-        df1_column_with_affix = df1_column_with_affix.format(affix,
-                                                             UtilFuncs._teradata_unquote_arg(column, "\""))
-        if df_utils._check_column_exists(df1_column_with_affix.lower(), col_list):
-            if df_side == "right":
-                suffix_side = "l{}".format(affix_type)
+        column_with_affix = "{0}_{1}" if affix_type == "prefix" else "{1}_{0}"
+        column_with_affix = column_with_affix.format(affix,
+                                                     UtilFuncs._teradata_unquote_arg(column, "\""))
+        if df_utils._check_column_exists(column_with_affix.lower(), col_list):
+            if other_df_side == "right":
+                affix_type = "l{}".format(affix_type)
             else:
-                suffix_side = "r{}".format(affix_type)
+                affix_type = "r{}".format(affix_type)
             raise TeradataMlException(
-                Messages.get_message(MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS, df1_column_with_affix, df_side,
-                                     suffix_side),
+                Messages.get_message(MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS, column_with_affix, other_df_side,
+                                     affix_type),
                 MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS)
-        return UtilFuncs._teradata_quote_arg(df1_column_with_affix, "\"", False)
+        return UtilFuncs._teradata_quote_arg(column_with_affix, "\"", False)
     def __add_column_type_item_to_dict(self, new_metadata_dict, new_column, column, column_types):
         """
@@ -7327,21 +7929,17 @@ class DataFrame():
         exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
         if exec_mode == 'REMOTE':
-            if _InternalBuffer.get("auth_token") is None:
-                raise TeradataMlException(Messages.get_message(
-                MessageCodes.FUNC_EXECUTION_FAILED, "'udf'", 'Authentication token is required to run udf. Set token using set_auth_token().'),
-                                      MessageCodes.FUNC_EXECUTION_FAILED)
-            else:
-                for colname, col in udf_expr.items():
-                    env_name = UtilFuncs._get_env_name(col)
-                    # Store the env_name and its corresponding output column
-                    if env_name in env_mapper:
-                        env_mapper[env_name].append(colname)
-                    else:
-                        env_mapper[env_name] = [colname]
+            _Validators._check_auth_token("udf")
+            for colname, col in udf_expr.items():
+                env_name = UtilFuncs._get_env_name(col)
+                # Store the env_name and its corresponding output column
+                if env_name in env_mapper:
+                    env_mapper[env_name].append(colname)
+                else:
+                    env_mapper[env_name] = [colname]
         else:
             env_mapper[env_name] = udf_expr.keys()
         for env_name, cols in env_mapper.items():
             # Create a dictionary of output columns to column type.
             returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
@@ -7389,6 +7987,97 @@ class DataFrame():
             df = tbl_operators.execute()
         return df
+    def _assign_call_udf(self, call_udf_expr):
+        """
+        DESCRIPTION:
+            Internal function for DataFrame.assign() to execute the call_udf using
+            Script/Apply Table Operator and create new column for teradataml DataFrame.
+        PARAMETER:
+            call_udf_expr:
+                Required Argument.
+                Specifies a dictionary of column name to call_udf expressions.
+                Types: dict
+        RETURNS:
+            teradataml DataFrame
+        RAISES:
+            None.
+        EXAMPLES:
+            # call_udf_expr is a dictionary of column names to call_udf expressions.
+            call_udf_expr = {'upper_col': <teradataml.dataframe.sql._SQLColumnExpression object at 0x0000028E59C44310>,
+                            'sum_col': <teradataml.dataframe.sql._SQLColumnExpression object at 0x0000028E59C41690>}
+            self._assign_register(call_udf_expr)
+        """
+        df = self
+        # Create a dictionary of output columns to column type (teradata type).
+        returns = OrderedDict([(column.name, column.type) for column in df._metaexpr.c])
+        # Create a dictionary of output columns to column type (python types).
+        output_type_converters = {col_name: _Dtypes._teradata_type_to_python_type(col_type) \
+                                  for col_name, col_type in returns.items()}
+        for colname, col in call_udf_expr.items():
+            returns[colname] = col.type
+            output_type_converters[colname] = _Dtypes._teradata_type_to_python_type(col.type)
+            script_name  = col._udf_script
+            delimiter = col._delimiter
+            quotechar = col._quotechar
+            # Create a dictionary of arguments to be passed to the script.
+            script_data = {}
+            script_data['input_cols'] = df.columns
+            script_data['output_cols'] = list(returns.keys())
+            script_data['output_type_converters'] = output_type_converters
+            script_data['function_args'] = {colname: col._udf_args}
+            script_data['delimiter'] = delimiter
+            script_data['qoutechar'] = quotechar
+            # Convert the dictionary to a string.
+            # The string is URL encoded to pass it as a parameter to the script.
+            script_data = urllib.parse.quote_plus(json.dumps(script_data))
+            if UtilFuncs._is_lake():
+                from teradataml.table_operators.Apply import Apply
+                apply_op_obj = Apply(data=df,
+                                    script_name=script_name,
+                                    env_name=col._env_name,
+                                    returns = returns,
+                                    delimiter = delimiter,
+                                    quotechar=quotechar,
+                                    files_local_path=GarbageCollector._get_temp_dir_name(),
+                                    apply_command="python3 {} {}".format(script_name, script_data)
+                                    )
+                try:
+                    df = apply_op_obj.execute_script(
+                        output_style=OutputStyle.OUTPUT_TABLE.value)
+                except Exception:
+                    raise
+            else:
+                import teradataml.context.context as context
+                database = context._get_current_databasename()
+                check_reserved_keyword = False if sorted(list(returns.keys())) == sorted(df.columns) else True
+                from teradataml.table_operators.Script import Script
+                table_op_obj = Script(data=df,
+                                    script_name=script_name,
+                                    files_local_path=GarbageCollector._get_temp_dir_name(),
+                                    script_command="{}/bin/python3  ./{}/{} {}".format(
+                                        configure.indb_install_location, database, script_name, script_data),
+                                    returns=returns,
+                                    quotechar=quotechar,
+                                    delimiter = delimiter
+                                    )
+                table_op_obj.check_reserved_keyword = check_reserved_keyword
+                try:
+                    df = table_op_obj.execute_script(
+                        output_style=OutputStyle.OUTPUT_TABLE.value)
+                except Exception:
+                    raise
+        return df
     @collect_queryband(queryband="DF_assign")
     def assign(self, drop_columns=False, **kwargs):
         """
@@ -7420,7 +8109,7 @@ class DataFrame():
                     * SQLAlchemy ClauseElements.
                       (See teradataml extension with SQLAlchemy in teradataml User Guide
                        and Function reference guide for more details)
-                    * Function - udf.
+                    * Function - udf, call_udf.
         RETURNS:
@@ -7454,7 +8143,7 @@ class DataFrame():
                Look at Example 18 to understand more.
              8. While passing multiple udf expressions, one can not pass one column output
                as another column input in the same ``assign`` call.
-             9. If user pass multiple udf expressions, delimiter and quotechar specified in
+             9. If user pass multiple udf expressions, delimiter and quotechar specified in
                last udf expression are considered for processing.
         RAISES:
@@ -7819,13 +8508,13 @@ class DataFrame():
             Red Inc     200.0  150.0  140.0    NaN  17/01/04    201.0     abc     RED INC      207
             >>>
-            # Example 19: Convert the values is 'accounts' column to upper case using a user
+            # Example 19: Convert the values is 'accounts' column to upper case using a user
             #             defined function on Vantage Cloud Lake.
             # Create a Python 3.10.5 environment with given name and description in Vantage.
             >>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
             User environment 'test_udf' created.
             >>>
-            # Create a user defined functions to 'to_upper' to get the values in upper case
+            # Create a user defined functions to 'to_upper' to get the values in upper case
             # and pass the user env to run it on.
             >>> from teradataml.dataframe.functions import udf
             >>> @udf(env_name = env)
@@ -7837,7 +8526,31 @@ class DataFrame():
             # to the DataFrame.
             >>> df.assign(upper_stats = to_upper('accounts'))
                           Feb    Jan    Mar    Apr  datetime upper_stats
-            accounts
+            accounts
+            Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
+            Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
+            Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
+            Jones LLC   200.0  150.0  140.0  180.0  17/01/04   JONES LLC
+            Orange Inc  210.0    NaN    NaN  250.0  17/01/04  ORANGE INC
+            Red Inc     200.0  150.0  140.0    NaN  17/01/04     RED INC
+            >>>
+            # Example 20: Register and Call the user defined function to get the values upper case.
+            >>> from teradataml.dataframe.functions import udf, register, call_udf
+            >>> @udf
+            ... def to_upper(s):
+            ...     if s is not None:
+            ...         return s.upper()
+            >>>
+            # Register the created user defined function with name "upper".
+            >>> register("upper", to_upper)
+            >>>
+            # Call the user defined function registered with name "upper" and assign the
+            # ColumnExpression returned to the DataFrame.
+            >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
+            >>> res
+                          Feb    Jan    Mar    Apr  datetime   upper_col
+            accounts
             Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
             Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
             Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
@@ -7894,10 +8607,14 @@ class DataFrame():
         # column name to normal/regular expressions.
         udf_expr = {}
         regular_expr = {}
+        call_udf_expr = {}
         for colname, col in kwargs.items():
             # If value passed in kwargs is a ColumnExpression and is a udf, store it.
             if isinstance(col, ColumnExpression) and col._udf:
                 udf_expr[colname] = col
+            # If value passed in kwargs is a ColumnExpression and is a registerd udf script, store it.
+            elif isinstance(col, ColumnExpression) and col._udf_script:
+                call_udf_expr[colname] = col
             else:
                 regular_expr[colname] = col
         df = self
@@ -7917,6 +8634,9 @@ class DataFrame():
         if bool(udf_expr):
             df = df._assign_udf(udf_expr)
+        if bool(call_udf_expr):
+            df = df._assign_call_udf(call_udf_expr)
         return df
@@ -8116,7 +8836,9 @@ class DataFrame():
         _Validators._validate_column_exists_in_dataframe(keys, self._metaexpr)
         try:
-            new_index_list = self._index_label if self._index_label is not None else []
+            # Slicing creates a new list instance with the same contents.
+            new_index_list = self._index_label[:] if self._index_label is not None else []
             # Creating a list with requested index labels bases on append
             if append:
@@ -8131,7 +8853,7 @@ class DataFrame():
                     new_index_list = keys
             # Takes care of appending already existing index
-            new_index_list = list(set(new_index_list))
+            new_index_list = list(dict.fromkeys(new_index_list))
             # In case requested index is same as existing index, return same DF
             if new_index_list == self._index_label:
@@ -9014,15 +9736,15 @@ class DataFrame():
             TypeError, ValueError, TeradataMLException
         EXAMPLES:
-            >>> # Load the example datasets.
-            ... load_example_data("dataframe", ["ocean_buoys", "ocean_buoys_nonpti"])
+            # Load the example datasets.
+            >>> load_example_data("dataframe", ["ocean_buoys", "ocean_buoys_nonpti"])
             >>>
-            >>> # Create the required DataFrames.
-            ... # DataFrame on non-sequenced PTI table
-            ... ocean_buoys = DataFrame("ocean_buoys")
-            >>> # Check DataFrame columns and let's peek at the data
-            ... ocean_buoys.columns
+            # Create the required DataFrames.
+            # DataFrame on non-sequenced PTI table
+            >>> ocean_buoys = DataFrame("ocean_buoys")
+            # Check DataFrame columns and let's peek at the data
+            >>> ocean_buoys.columns
             ['buoyid', 'TD_TIMECODE', 'temperature', 'salinity']
             >>> ocean_buoys.head()
                                    TD_TIMECODE  temperature  salinity
@@ -9038,10 +9760,10 @@ class DataFrame():
             0       2014-01-06 08:00:00.000000         10.0        55
             0       2014-01-06 08:10:00.000000         10.0        55
-            >>> # DataFrame on NON-PTI table
-            ... ocean_buoys_nonpti = DataFrame("ocean_buoys_nonpti")
-            >>> # Check DataFrame columns and let's peek at the data
-            ... ocean_buoys_nonpti.columns
+            # DataFrame on NON-PTI table
+            >>> ocean_buoys_nonpti = DataFrame("ocean_buoys_nonpti")
+            # Check DataFrame columns and let's peek at the data
+            >>> ocean_buoys_nonpti.columns
             ['buoyid', 'timecode', 'temperature', 'salinity']
             >>> ocean_buoys_nonpti.head()
                                         buoyid  temperature  salinity
@@ -9553,6 +10275,12 @@ class DataFrame():
         # Validate argument types
         _Validators._validate_function_arguments(awu_matrix)
+        # If self and right DataFrames are pointing to same Table object,
+        # raise error.
+        if self._metaexpr.t is right._metaexpr.t:
+            raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_ALIAS_REQUIRED, "merge"),
+                                      MessageCodes.TDMLDF_ALIAS_REQUIRED)
         if (right_on is not None and left_on is None) or (right_on is None and left_on is not None):
             raise TeradataMlException(
                 Messages.get_message(MessageCodes.MUST_PASS_ARGUMENT, "left_on", "right_on"),
@@ -9609,6 +10337,15 @@ class DataFrame():
         # If user did not pass any arguments which form join conditions,
         # Merge is performed using index columns of TeradataML DataFrames
         if on is None and left_on is None and right_on is None and not use_index:
+            # DataFrames created on OTF table will not have index.
+            if self._datalake is not None or right._datalake is not None:
+                msg_code = MessageCodes.EXECUTION_FAILED
+                emsg = "Either 'on' argument or both 'left_on' and 'right_on' arguments" \
+                       " must be provided to merge DataFrames when they are created on" \
+                       " OTF table(s)."
+                error_msg = Messages.get_message(msg_code, "merge dataframes", emsg)
+                raise TeradataMlException(error_msg, msg_code)
             if self._index_label is None or right._index_label is None:
                 raise TeradataMlException(
                     Messages.get_message(MessageCodes.TDMLDF_INDEXES_ARE_NONE), MessageCodes.TDMLDF_INDEXES_ARE_NONE)
@@ -9616,6 +10353,12 @@ class DataFrame():
                 use_index = True
         if use_index:
+            if self._datalake is not None or right._datalake is not None:
+                msg_code = MessageCodes.EXECUTION_FAILED
+                emsg = "Can not use Index to merge DataFrames when they are created on OTF table(s)."
+                error_msg = Messages.get_message(msg_code, "merge dataframes", emsg)
+                raise TeradataMlException(error_msg, msg_code)
             if self._index_label is None or right._index_label is None:
                 raise TeradataMlException(
                     Messages.get_message(MessageCodes.TDMLDF_INDEXES_ARE_NONE), MessageCodes.TDMLDF_INDEXES_ARE_NONE)
@@ -10271,7 +11014,7 @@ class DataFrame():
                     2. seed is supported for stratify column.
                     3. Arguments "stratify_column", "seed", "id_column" are supported only
                        for stratifying the data.
-                Types: str
+                Types: str OR Feature
             seed:
                 Optional Argument.
@@ -10297,7 +11040,7 @@ class DataFrame():
                        for stratifying the data.
                     2. "id_column" is supported only when "stratify_column" is used.
                        Ignored otherwise.
-                Types: str
+                Types: str OR Feature
         RETURNS:
             teradataml DataFrame
@@ -12332,6 +13075,9 @@ class DataFrame():
                                                              False)
             column_names = list(dict.fromkeys(column_names))
+        if list_td_reserved_keywords(column_names) or UtilFuncs._is_ascii(column_names):
+            column_names = UtilFuncs._teradata_quote_arg(column_names, "\"", False)
         col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, column_names)
         sel_nodeid = self._aed_utils._aed_select(self._nodeid, ','.join(column_names), True)
         new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items())
@@ -14249,7 +14995,18 @@ class DataFrame():
             >>> plot.show()
         """
-        return _Plot(x=x, y=y, scale=scale, kind=kind, **kwargs)
+        _plot = _Plot(x=x, y=y, scale=scale, kind=kind, **kwargs)
+        # If plot is already generated, return the same plot.
+        if self._plot is None:
+            self._plot = _plot
+            return _plot
+        if self._plot == _plot:
+            return self._plot
+        else:
+            self._plot = _plot
+            return _plot
     @collect_queryband(queryband="DF_itertuples")
     def itertuples(self, name='Row', num_rows=None):
@@ -17142,11 +17899,18 @@ class _TDUAF(DataFrame):
             table_name = self._db_utils._execute_node_return_db_object_name(self._data._nodeid, self._data._metaexpr)
         # UAF Functions do not accept double quotes.
+        tdp = preparer(td_dialect)
         db_name = UtilFuncs._extract_db_name(table_name)
-        if db_name:
-            table_name = '"{}"."{}"'.format(db_name, UtilFuncs._extract_table_name(table_name))
+        datalake_name = UtilFuncs._extract_datalake_name(table_name)
+        if datalake_name:
+            table_name = '{}.{}.{}'.format(tdp.quote(datalake_name),
+                                           tdp.quote(db_name),
+                                           tdp.quote(UtilFuncs._extract_table_name(table_name)))
+        elif db_name:
+            table_name = '{}.{}'.format(tdp.quote(db_name),
+                                        tdp.quote(UtilFuncs._extract_table_name(table_name)))
         else:
-            table_name = UtilFuncs._extract_table_name(table_name)
+            table_name = tdp.quote(UtilFuncs._extract_table_name(table_name))
         sql_clauses.append("TABLE_NAME ({})")
         sql_values.append(table_name)

teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.4py3-none-any.whl