PyPI - teradataml - Versions diffs - 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl - Mend

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (108) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +71 -0
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +51 -24
teradataml/analytics/json_parser/utils.py +11 -17
teradataml/automl/__init__.py +103 -48
teradataml/automl/data_preparation.py +55 -37
teradataml/automl/data_transformation.py +131 -69
teradataml/automl/feature_engineering.py +117 -185
teradataml/automl/feature_exploration.py +9 -2
teradataml/automl/model_evaluation.py +13 -25
teradataml/automl/model_training.py +214 -75
teradataml/catalog/model_cataloging_utils.py +1 -1
teradataml/clients/auth_client.py +133 -0
teradataml/common/aed_utils.py +3 -2
teradataml/common/constants.py +11 -6
teradataml/common/garbagecollector.py +5 -0
teradataml/common/messagecodes.py +3 -1
teradataml/common/messages.py +2 -1
teradataml/common/utils.py +6 -0
teradataml/context/context.py +49 -29
teradataml/data/advertising.csv +201 -0
teradataml/data/bank_marketing.csv +11163 -0
teradataml/data/bike_sharing.csv +732 -0
teradataml/data/boston2cols.csv +721 -0
teradataml/data/breast_cancer.csv +570 -0
teradataml/data/customer_segmentation_test.csv +2628 -0
teradataml/data/customer_segmentation_train.csv +8069 -0
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
teradataml/data/glm_example.json +28 -1
teradataml/data/housing_train_segment.csv +201 -0
teradataml/data/insect2Cols.csv +61 -0
teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
teradataml/data/kmeans_example.json +5 -0
teradataml/data/kmeans_table.csv +10 -0
teradataml/data/onehot_encoder_train.csv +4 -0
teradataml/data/openml_example.json +29 -0
teradataml/data/scale_attributes.csv +3 -0
teradataml/data/scale_example.json +52 -1
teradataml/data/scale_input_part_sparse.csv +31 -0
teradataml/data/scale_input_partitioned.csv +16 -0
teradataml/data/scale_input_sparse.csv +11 -0
teradataml/data/scale_parameters.csv +3 -0
teradataml/data/scripts/deploy_script.py +20 -1
teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
teradataml/data/teradataml_example.json +77 -0
teradataml/data/ztest_example.json +16 -0
teradataml/dataframe/copy_to.py +8 -3
teradataml/dataframe/data_transfer.py +120 -61
teradataml/dataframe/dataframe.py +102 -17
teradataml/dataframe/dataframe_utils.py +47 -9
teradataml/dataframe/fastload.py +272 -89
teradataml/dataframe/sql.py +84 -0
teradataml/dbutils/dbutils.py +2 -2
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
teradataml/options/__init__.py +13 -4
teradataml/options/configure.py +27 -6
teradataml/scriptmgmt/UserEnv.py +19 -16
teradataml/scriptmgmt/lls_utils.py +117 -14
teradataml/table_operators/Script.py +2 -3
teradataml/table_operators/TableOperator.py +58 -10
teradataml/utils/validators.py +40 -2
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0

teradataml/dataframe/data_transfer.py CHANGED Viewed

@@ -19,11 +19,11 @@ from teradataml.common.sqlbundle import SQLBundle
 from teradataml.common.utils import UtilFuncs
 from teradataml.common.constants import CopyToConstants
 from teradataml.context.context import get_context, get_connection, \
-                                       _get_context_temp_databasename
+    _get_context_temp_databasename, _get_current_databasename
 from teradataml.dataframe import dataframe as tdmldf
 from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
-                                        _get_pd_df_column_names, _extract_column_info, \
-                                        _check_columns_insertion_compatible, _get_index_labels
+    _get_pd_df_column_names, _extract_column_info, \
+    _check_columns_insertion_compatible, _get_index_labels
 from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
 from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
 from teradataml.utils.validators import _Validators
@@ -335,7 +335,7 @@ def fastexport(df, export_to="pandas", index_column=None,
             if not csv_file:
                 raise TeradataMlException(
                     Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "csv_file",
-                                         "{0}='{1}'".format("export_to","csv")),
+                                         "{0}='{1}'".format("export_to", "csv")),
                     MessageCodes.DEPENDENT_ARG_MISSING)
             if not csv_file.lower().endswith(".csv"):
@@ -363,7 +363,7 @@ def fastexport(df, export_to="pandas", index_column=None,
         raise TeradataMlException(
             Messages.get_message(MessageCodes.DATA_EXPORT_FAILED, "fastexport",
                                  export_to, str(err)),
-                                 MessageCodes.DATA_EXPORT_FAILED)
+            MessageCodes.DATA_EXPORT_FAILED)
 @collect_queryband(queryband="rdCsv")
@@ -601,7 +601,7 @@ def read_csv(filepath,
             Specifies whether to persist the errors/warnings(if any) information in Vantage
             or not.
             If "save_errors" is set to False:
-             1. Errors or warnings (in any) are not persisted into tables.
+             1. Errors or warnings (if any) are not persisted into tables.
              2. Errors table genarated by FastloadCSV are not persisted.
             If "save_errors" is set to True:
              1. The errors or warnings information is persisted and names of error and
@@ -883,7 +883,7 @@ def read_csv(filepath,
             # Create SQLAlchemy table object from existing table.
             existing_table = UtilFuncs._get_sqlalchemy_table(table_name,
-                                                    schema_name=schema_name)
+                                                             schema_name=schema_name)
             # Check compatibility of CSV columns with existing table columns.
             if types is not None:
@@ -904,7 +904,7 @@ def read_csv(filepath,
                                                                          primary_index=existing_table_primary_index)
             else:
                 rc_dict = dt_obj._insert_from_csv_without_fastload(table_name=table_name,
-                                                column_names=cols_name)
+                                                                   column_names=cols_name)
         # Return the read_csv result.
         return dt_obj._get_result(rc_dict)
@@ -923,6 +923,7 @@ class _DataTransferUtils():
     Teradata Vantage to outside world, for example Data Transfer using
     FastExport Protocol.
     """
     def __init__(self, df, index_column=None, num_rows=99999, all_rows=False,
                  catch_errors_warnings=False, table_name=None,
                  schema_name=None, if_exists='append', index=False,
@@ -934,7 +935,9 @@ class _DataTransferUtils():
                  columns_list=None, sequence_column=None, seq_max=None,
                  use_fastload=True, api_name='fastexport',
                  open_sessions=None, chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
-                 match_column_order=True):
+                 match_column_order=True, err_tbl_1_suffix=None,
+                 err_tbl_2_suffix=None, err_tbl_name=None, warn_tbl_name=None,
+                 err_staging_db=None):
         """
         DESCRIPTION:
             Constructor for the _DataTransferUtils class. It initialises
@@ -1088,6 +1091,35 @@ class _DataTransferUtils():
                 Default Value: 16383
                 Types: int
+            err_tbl_1_suffix:
+                Optional Argument.
+                Specifies the suffix for error table 1 created by fastload job.
+                Types: String
+            err_tbl_2_suffix:
+                Optional Argument.
+                Specifies the suffix for error table 2 created by fastload job.
+                Types: String
+            err_tbl_name:
+                Optional Argument.
+                Specifies the name for error table.
+                Types: String
+            warn_tbl_name:
+                Optional Argument.
+                Specifies the name for warning table.
+                Types: String
+            err_staging_db:
+                Optional Argument.
+                Specifies the name of the database to be used for creating staging
+                table and error tables.
+                Note:
+                    Current session user must have CREATE, DELETE and INSERT table
+                    rights on err_staging_db database.
+                Types: String
         PARAMETERS:
             None.
@@ -1139,6 +1171,11 @@ class _DataTransferUtils():
         self.open_sessions = open_sessions
         self.chunksize = chunksize
         self.match_column_order = match_column_order
+        self.err_tbl_1_suffix = err_tbl_1_suffix
+        self.err_tbl_2_suffix = err_tbl_2_suffix
+        self.err_tbl_name = err_tbl_name
+        self.warn_tbl_name = warn_tbl_name
+        self.err_staging_db = err_staging_db
         # Validate arguments.
         if self.api_name == 'fastexport':
@@ -1243,6 +1280,7 @@ class _DataTransferUtils():
             dt_obj = _DataTransferUtils()
             dt_obj._validate_csv_sep_quotechar()
         """
         # Function to validate char value for length and allowed characters.
         def validate_char_arg_csv(arg_name, arg):
@@ -1444,7 +1482,7 @@ class _DataTransferUtils():
                                               csv_file_name=target_csv)
         """
-        fastexport_esc_func=""
+        fastexport_esc_func = ""
         open_session_esc_func = ""
         if require is not None:
             if require:
@@ -1492,7 +1530,7 @@ class _DataTransferUtils():
             write_csv_escape_func = DriverEscapeFunctions.WRITE_TO_CSV.value.format(csv_file_name)
             field_sep_esc_func = DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
             field_quote_esc_func = DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
         query = "{0}{1}{2}{3}{4}{5}".format(fastexport_esc_func,
                                             open_session_esc_func,
                                             field_sep_esc_func,
@@ -1639,7 +1677,7 @@ class _DataTransferUtils():
     # End of functions specific to exporting table data in Vantage into pandas DataFrame.
-   # General functions to get warrnings and errors.
+    # General functions to get warrnings and errors.
     def _get_errors_warnings(self, cur, insert_stmt, escape_function):
         """
         Internal function executes teradatasql provided escape functions
@@ -1672,7 +1710,7 @@ class _DataTransferUtils():
             dt_obj._get_errors_warnings(cur, insert_stmt, escape_function)
         """
         errorwarninglist = self._process_escape_functions(cur,
-                                                          escape_function= escape_function,
+                                                          escape_function=escape_function,
                                                           insert_query=insert_stmt)
         from teradatasql import vernumber
@@ -1714,11 +1752,10 @@ class _DataTransferUtils():
         return pd.DataFrame()
-    def _create_error_warnings_table(self, pdf, msg_type, logon_seq_number):
+    def _create_error_warnings_table(self, pdf, msg_type, logon_seq_number, table_name=None):
         """
         DESCRIPTION:
-            Internal function creates the errors and warnings table in Vantage,
-            if save_errors is set to True.
+            Internal function creates the errors and warnings table in Vantage.
         PARAMETERS:
             pdf:
@@ -1747,12 +1784,13 @@ class _DataTransferUtils():
             dt_obj = _DataTransferUtils(df, table_name, types)
             dt_obj._create_error_warnings_table(pdf, msg_type, logon_seq_number)
         """
-        if self.save_errors:
-            err_warn_tablename = "td_fl_{0}_{1}_{2}".format(self.table_name, msg_type, logon_seq_number)
-            copy_to_sql(pdf, err_warn_tablename, if_exists='replace')
-            return err_warn_tablename
-        return ''
+        if not table_name:
+            table_name = "td_fl_{0}_{1}_{2}".format(self.table_name, msg_type, logon_seq_number)
+        copy_to_sql(pdf, table_name, schema_name=self.err_staging_db,
+                    if_exists='replace')
+        return "{}.{}".format(self.err_staging_db if self.err_staging_db
+                              else _get_current_databasename(),
+                              table_name)
     def _process_escape_functions(self, cur, escape_function, insert_query=None):
         """
@@ -1834,16 +1872,23 @@ class _DataTransferUtils():
             ins_query = dt_obj._form_insert_query()
         """
-        csv_esc_func = ""
-        open_sessions_esc_func = ""
-        field_sep_esc_func = ""
-        field_quote_esc_func = ""
+        escape_funcs = ""
+        # Get the fastload escape function.
+        if self.use_fastload:
+            escape_funcs = escape_funcs + DriverEscapeFunctions.REQUIRE_FASTLOAD.value
+        # Get the escape function clause for open_sessions.
+        if self.open_sessions is not None:
+            escape_funcs = escape_funcs + DriverEscapeFunctions.OPEN_SESSIONS.value.format(self.open_sessions)
+        # Create the list of values to be inserted.
         if self.api_name == "fastload":
             col_names = _get_pd_df_column_names(self.df)
             insert_values = ", ".join(['?' for i in range(len(col_names) + len(self.df.index.names)
                                                           if self.index is True else len(col_names))])
+        # Get escape functions related to read_csv.
         if self.api_name == "read_csv":
             # Get the column names.
             if self.if_exists == 'append' and column_names is not None:
@@ -1852,7 +1897,7 @@ class _DataTransferUtils():
                 col_names, _ = _extract_column_info(self.df, self.types)
             # Get read_csv escape function.
-            csv_esc_func = DriverEscapeFunctions.READ_CSV.value.format(self.df)
+            escape_funcs = escape_funcs + DriverEscapeFunctions.READ_CSV.value.format(self.df)
             insert_values = ", ".join(['?' for i in range(len(col_names))])
             # Create escape function for sep.
@@ -1861,7 +1906,7 @@ class _DataTransferUtils():
                 field_sep = "''''"
             elif self.sep == "\"":
                 field_sep = "\"\"\"\""
-            field_sep_esc_func = DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
+            escape_funcs = escape_funcs + DriverEscapeFunctions.FIELD_SEP.value.format(field_sep)
             # Create escape function for quotechar.
             field_quote = "'{0}'".format(self.quotechar)
@@ -1869,27 +1914,28 @@ class _DataTransferUtils():
                 field_quote = "''''"
             elif self.quotechar == "\"":
                 field_quote = "\"\"\"\""
-            field_quote_esc_func = DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
+            escape_funcs = escape_funcs + DriverEscapeFunctions.FIELD_QUOTE.value.format(field_quote)
         # Create base insert query.
         base_insert_query = "INSERT INTO {0} VALUES ({1});".format(table, insert_values)
-        # Get the fastload escape function.
-        fastload_esc_func = DriverEscapeFunctions.REQUIRE_FASTLOAD.value \
-                            if self.use_fastload else ""
+        # Get the escape function clauses for error table and DB related escape functions.
+        # TODO: This condition will be optimized with ELE-6743.
+        if self.api_name == "fastload" and self.save_errors and not self.err_tbl_name:
+            escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_MNG_FLAG.value.format("off")
-        # Get the escape function clause for open_sessions
-        if self.open_sessions is not None:
-            open_sessions_esc_func = DriverEscapeFunctions.OPEN_SESSIONS.value.format(self.open_sessions)
+        if self.err_tbl_1_suffix:
+            escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_1.value.format(self.err_tbl_1_suffix)
-        query = "{0}{1}{2}{3}{4}{5}".format(fastload_esc_func,
-                                            open_sessions_esc_func,
-                                            field_sep_esc_func,
-                                            field_quote_esc_func,
-                                            csv_esc_func,
-                                            base_insert_query)
-        return query
+        if self.err_tbl_2_suffix:
+            escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_TBL_2.value.format(self.err_tbl_2_suffix)
+        if self.err_staging_db:
+            escape_funcs = escape_funcs + DriverEscapeFunctions.ERR_STAGING_DB.value.format(self.err_staging_db)
+        # Generate final insert query by appending all escape functions.
+        query = "{0}{1}".format(escape_funcs, base_insert_query)
+        return query
     def _table_exists(self, con):
         """
@@ -1914,8 +1960,7 @@ class _DataTransferUtils():
         """
         return con.dialect.has_table(get_connection(), self.table_name, self.schema_name)
-    def _get_fully_qualified_table_name(self, table_name=None):
+    def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
         """
         DESCRIPTION:
             Function returns schema qualified table name
@@ -1929,6 +1974,11 @@ class _DataTransferUtils():
                 Specifies the table name.
                 Types: str
+            schema_name:
+                Optional Argument.
+                Specifies the schema name.
+                Types: str
         RETURNS:
             str.
@@ -1942,12 +1992,14 @@ class _DataTransferUtils():
         table_name = table_name if table_name else self.table_name
         table = '"{}"'.format(table_name)
-        if self.schema_name is not None:
+        if schema_name is not None:
+            table = '"{}"."{}"'.format(schema_name, table_name)
+        elif self.schema_name is not None:
             table = '"{}"."{}"'.format(self.schema_name, table_name)
         return table
-    def _create_table(self, con, table_name=None):
+    def _create_table(self, con, table_name=None, schema_name=None):
         """
         DESCRIPTION:
             Internal function creates table in the Vantage.
@@ -1963,6 +2015,11 @@ class _DataTransferUtils():
                 Specifies the table name.
                 Types: str
+            schema_name:
+                Optional Argument.
+                Specifies the schema name where table needs to be created.
+                Types: str
         RETURNS:
             None.
@@ -1974,9 +2031,9 @@ class _DataTransferUtils():
             dt_obj._create_table(con)
         """
         table_name = table_name if table_name else self.table_name
+        schema_name = schema_name if schema_name else self.schema_name
         table = _create_table_object(df=self.df, table_name=table_name, types=self.types, con=con,
-                                     schema_name=self.schema_name, primary_index=self.primary_index,
+                                     schema_name=schema_name, primary_index=self.primary_index,
                                      temporary=self.temporary, set_table=self.set_table, index=self.index,
                                      index_label=self.index_label)
@@ -2032,7 +2089,7 @@ class _DataTransferUtils():
             # Turn off autocommit before the Fastload insertion.
             self._process_escape_functions(cur, escape_function= \
-                                            DriverEscapeFunctions.AUTOCOMMIT_OFF)
+                DriverEscapeFunctions.AUTOCOMMIT_OFF)
             # Initialize dict template for saving error/warning information.
             err_dict = {}
@@ -2052,7 +2109,7 @@ class _DataTransferUtils():
             # Get logon sequence number to be used for error/warning table names
             logon_seq_number = self._process_escape_functions(cur, escape_function= \
-                                                              DriverEscapeFunctions.LOGON_SEQ_NUM,
+                DriverEscapeFunctions.LOGON_SEQ_NUM,
                                                               insert_query=ins)
             # Commit the rows
@@ -2064,16 +2121,16 @@ class _DataTransferUtils():
             if len(warn) != 0:
                 warn_dict['error_message'].extend(warn)
-            # Get error and warning informations for error and warning tables, persist
+            # Get error and warning information for error and warning tables, persist
             # error and warning tables to Vantage if user has specified save_error as True
             # else show it as pandas dataframe on console.
             pd_err_df = self._get_pandas_df_from_errors_warnings(err_dict)
-            if not pd_err_df.empty:
+            if not pd_err_df.empty and self.save_errors:
                 msg_type = "err"
                 error_tablename = self._create_error_warnings_table(pd_err_df, msg_type, logon_seq_number[0][0])
             pd_warn_df = self._get_pandas_df_from_errors_warnings(warn_dict)
-            if not pd_warn_df.empty:
+            if not pd_warn_df.empty and self.save_errors:
                 msg_type = "warn"
                 warn_tablename = self._create_error_warnings_table(pd_warn_df, msg_type, logon_seq_number[0][0])
@@ -2100,7 +2157,7 @@ class _DataTransferUtils():
         finally:
             # Turn on autocommit.
             self._process_escape_functions(cur, escape_function= \
-                                           DriverEscapeFunctions.AUTOCOMMIT_ON)
+                DriverEscapeFunctions.AUTOCOMMIT_ON)
             cur.close()
     def _get_result(self, result_dict=None):
@@ -2262,7 +2319,7 @@ class _DataTransferUtils():
         awu_matrix.append(['quotechar', self.quotechar, True, (str)])
         awu_matrix.append(['catch_errors_warnings', self.catch_errors_warnings, False, (bool)])
         awu_matrix.append(['use_fastload', self.use_fastload, False, (bool)])
-        awu_matrix.append(['open_sessions',self.open_sessions, True, (int), False])
+        awu_matrix.append(['open_sessions', self.open_sessions, True, (int), False])
         awu_matrix.append(['chunksize', self.chunksize, False, (int)])
         awu_matrix.append(['match_column_order', self.match_column_order, True, (bool)])
         if isinstance(self.df, pd.DataFrame):
@@ -2307,8 +2364,8 @@ class _DataTransferUtils():
                     if (is_multi_index and ((isinstance(self.index_label, str) and index_levels != 1) or
                                             (is_index_list and index_levels != len(self.index_label)))) or \
-                                            (not is_multi_index and is_index_list and
-                                            (is_index_list and num_index > 1)):
+                            (not is_multi_index and is_index_list and
+                             (is_index_list and num_index > 1)):
                         valid_arg_msg = 'String or list of Strings with the number of ' \
                                         'Strings matching the number of levels' \
                                         ' in the index'
@@ -2522,16 +2579,18 @@ class _DataTransferUtils():
             # Load the data from CSV to staging table.
             rc_dict = self._insert_from_csv_with_fastload(table_name=stag_table_name,
-                                          column_names=column_names)
+                                                          column_names=column_names)
             # Insert all rows from staging table to already existing table.
             df_utils._insert_all_from_table(self.table_name,
-                                            self._get_fully_qualified_table_name(stag_table_name),
+                                            stag_table_name,
                                             column_names,
-                                            self.schema_name)
+                                            to_schema_name=self.schema_name,
+                                            from_schema_name=self.schema_name)
             return rc_dict
         finally:
+            # Drop the staging table.
             if stage_table_created:
                 UtilFuncs._drop_table(self._get_fully_qualified_table_name(stag_table_name))
@@ -2733,7 +2792,7 @@ class _DataTransferUtils():
             # Get open_sessions argument.
             open_sessions = kwargs.pop("open_sessions", None)
             if not require_fastexport and open_sessions is not None:
-                raise TeradataMlException("'{0}' can only be used when '{1}' is set to True."\
+                raise TeradataMlException("'{0}' can only be used when '{1}' is set to True." \
                                           .format("open_sessions", "fastexport or require"),
                                           MessageCodes.DEPENDENT_ARGUMENT)

teradataml/dataframe/dataframe.py CHANGED Viewed

@@ -24,6 +24,7 @@ import teradataml.context.context as tdmlctx
 from collections import OrderedDict, namedtuple
 from sqlalchemy.sql import ClauseElement
 from teradataml import execute_sql
+from teradataml import GarbageCollector
 from teradataml.dataframe.sql import _MetaExpression
 from teradataml.dataframe.sql_interfaces import ColumnExpression
 from teradataml.dataframe.sql_functions import case
@@ -5017,7 +5018,7 @@ class DataFrame():
                     'median', 'var'
                 Acceptable formats for function(s) are
-                    string, dictionary or list of strings/functions.
+                    string, dictionary, list of strings/functions/ColumnExpression or ColumnExpression.
                 Accepted combinations are:
                     1. String function name
@@ -5025,12 +5026,57 @@ class DataFrame():
                     3. Dictionary containing column name as key and
                        aggregate function name (string or list of
                        strings) as value
+                    4. ColumnExpression built using the aggregate functions.
+                    5. List of ColumnExpression built using the aggregate functions.
+                Note:
+                * The name of the output columns are generated based on aggregate functions and column names.
+                    For Example,
+                    1. "func" passed as a string.
+                        >>> df.agg('mean')
+                        Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
+                        After the above operation, the output column names are:
+                          mean_employee_no, mean_marks, mean_dob, mean_joined_date
+                    2. "func" passed as a list of string functions.
+                        >>> df.agg(['min', 'sum'])
+                        Assume that the column names of the dataframe are employee_no, first_name, marks, dob, joined_date.
+                        After the above operation, the output column names are:
+                          min_employee_no, sum_employee_no, min_first_name, min_marks, sum_marks, min_dob, min_joined_date
+                    3. "func" passed as a dictionary containing column name as key and aggregate function name as value.
+                        >>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min']})
+                        Output column names after the above operation are:
+                          min_employee_no, sum_employee_no, var_employee_no, min_first_name
+                    4. "func" passed as a ColumnExpression built using the aggregate functions.
+                        >>> df.agg(df.first_name.count())
+                        Output column name after the above operation is:
+                          count(first_name)
+                    5. "func" passed as a list of ColumnExpression built using the aggregate functions.
+                        >>> df.agg([df.employee_no.min(), df.first_name.count()])
+                        Output column names after the above operation are:
+                          min(employee_no), count(first_name)
+                * On ColumnExpression or list of ColumnExpression alias() can be used to
+                  return the output columns with aliased name.
+                    For Example,
+                    >>> df.agg(df.first_name.count().alias("total_names"))
+                    Output column name after the above operation is:
+                      total_names
+                    >>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
+                    Output column names after the above operation are:
+                      min_date, total_names
         RETURNS:
             teradataml DataFrame object with operations
             mentioned in parameter 'func' performed on specified
             columns.
         RAISES:
             TeradataMLException
             1. TDMLDF_AGGREGATE_FAILED - If operations on given columns
@@ -5072,8 +5118,8 @@ class DataFrame():
                 valid datatype.
                 Possible error message:
-                Invalid type(s) passed to argument 'func', should be:"\
-                             "['str', 'list', 'dict'].
+                Invalid type(s) passed to argument 'func', should be:
+                ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression'].
         EXAMPLES :
             # Load the data to run the example.
@@ -5090,21 +5136,49 @@ class DataFrame():
             112               None  None  None    18/12/05
             >>>
-            # Dictionary of column names to string function/list of string functions as parameter.
+            # Get the minimum, sum and variance of employee number and minimum and mean of name,
+            # by passing dictionary of column names to string function/list of string functions as parameter.
             >>> df.agg({'employee_no' : ['min', 'sum', 'var'], 'first_name' : ['min', 'mean']})
-                  min_employee_no sum_employee_no  var_employee_no min_first_name
-                0             100             313        44.333333           abcd
+              min_employee_no sum_employee_no  var_employee_no min_first_name
+            0             100             313        44.333333           abcd
-            # List of string functions as parameter.
+            # Get the minimum and sum of all the columns in the dataframe,
+            # by passing list of string functions as parameter.
             >>> df.agg(['min', 'sum'])
-                  min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
-                0             100             313           abcd      None      None    None      1902-05-12
+              min_employee_no sum_employee_no min_first_name min_marks sum_marks min_dob min_joined_date
+            0             100             313           abcd      None      None    None      1902-05-12
-            # A string function as parameter.
+            # Get the mean of all the columns in the dataframe, by passing string function as parameter.
             >>> df.agg('mean')
                mean_employee_no mean_marks mean_dob mean_joined_date
             0        104.333333       None     None         60/12/04
+            # Get the total names in the dataframe, by running count() on the "first_name"
+            # and passing ColumnExpression as parameter.
+            >>> df.agg(df.first_name.count())
+               count(first_name)
+            0                  2
+            # Get the minimum of joining date and total of names in the dataframe,
+            # by running min() on joined_date and count() on the "first_name"
+            # and passing list of ColumnExpression as parameter.
+            >>> df.agg([df.employee_no.min(), df.first_name.count()])
+               min(employee_no)  count(first_name)
+            0               100                  2
+            # Get the total names in the dataframe, by running count() on the "first_name" and
+            # use alias() to have the output column named as "total_names".
+            >>> df.agg(df.first_name.count().alias("total_names"))
+               total_names
+            0            2
+            # Get the minimum of joining date and total names in the dataframe,
+            # by running min() on joined_date and count() on the "first_name" and
+            # use alias() to have the output column named as "min_date" and "total_names".
+            >>> df.agg([df.joined_date.min().alias("min_date"), df.first_name.count().alias("total_names")])
+               min_date  total_names
+            0  02/12/05            2
             # Select only subset of columns from the DataFrame.
             >>> df1 = df.select(['employee_no', 'first_name', 'joined_date'])
@@ -5145,9 +5219,9 @@ class DataFrame():
             raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, "func"),
                                       MessageCodes.MISSING_ARGS)
-        if not isinstance(func, str) and not isinstance(func, list) and not isinstance(func, dict):
+        if not isinstance(func, (str, list, dict, ColumnExpression)):
             raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
-                                                           'func', ['str', 'list', 'dict']),
+                                      'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
                                       MessageCodes.UNSUPPORTED_DATATYPE)
         return self._get_dataframe_aggregate(func)
@@ -5169,6 +5243,8 @@ class DataFrame():
                     3. Dictionary containing column name as key and
                        aggregate function name (string or list of
                        strings) as value
+                    4. ColumnExpression built using the aggregate functions.
+                    5. List of ColumnExpression built using the aggregate functions.
             **kwargs: Keyword arguments. Mainly used for Time Series Aggragates.
@@ -10013,9 +10089,10 @@ class DataFrame():
                 case_when_then = {}
                 list_of_fracs = frac
-                # When stratify column is passed for sample then perform TrainTestSplit
-                # for data sampling.
-                if stratify_column is not None:
+                # When stratify column is passed for sample or when seed is passed for
+                # reproducibilty of result then
+                # perform TrainTestSplit for data sampling.
+                if stratify_column is not None or seed is not None:
                     # Local import TrainTestSplit function.
                     from teradataml.analytics.sqle import TrainTestSplit
@@ -10029,7 +10106,15 @@ class DataFrame():
                                                         train_size=list_of_fracs[0],
                                                         test_size=list_of_fracs[1],
                                                         stratify_column=stratify_column,
-                                                        seed=seed)
+                                                        seed=seed,
+                                                        persist=True)
+                    # Retrieve the table name from TrainTestSplit_out object.
+                    table_name = TrainTestSplit_out.result._table_name
+                    # Add the table to garbage collector.
+                    table_added = GarbageCollector._add_to_garbagecollector(table_name)
                     # Retrieve the sampled result and updated the column name and values
                     # for backward compatibility.
                     _sampled_df = TrainTestSplit_out.result
@@ -16626,7 +16711,7 @@ class _TDUAF(DataFrame):
         # UAF Functions do not accept double quotes.
         db_name = UtilFuncs._extract_db_name(table_name)
         if db_name:
-            table_name = "{}.{}".format(db_name, UtilFuncs._extract_table_name(table_name))
+            table_name = '"{}"."{}"'.format(db_name, UtilFuncs._extract_table_name(table_name))
         else:
             table_name = UtilFuncs._extract_table_name(table_name)

teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.1py3-none-any.whl