PyPI - teradataml - Versions diffs - 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl - Mend

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (108) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +71 -0
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +51 -24
teradataml/analytics/json_parser/utils.py +11 -17
teradataml/automl/__init__.py +103 -48
teradataml/automl/data_preparation.py +55 -37
teradataml/automl/data_transformation.py +131 -69
teradataml/automl/feature_engineering.py +117 -185
teradataml/automl/feature_exploration.py +9 -2
teradataml/automl/model_evaluation.py +13 -25
teradataml/automl/model_training.py +214 -75
teradataml/catalog/model_cataloging_utils.py +1 -1
teradataml/clients/auth_client.py +133 -0
teradataml/common/aed_utils.py +3 -2
teradataml/common/constants.py +11 -6
teradataml/common/garbagecollector.py +5 -0
teradataml/common/messagecodes.py +3 -1
teradataml/common/messages.py +2 -1
teradataml/common/utils.py +6 -0
teradataml/context/context.py +49 -29
teradataml/data/advertising.csv +201 -0
teradataml/data/bank_marketing.csv +11163 -0
teradataml/data/bike_sharing.csv +732 -0
teradataml/data/boston2cols.csv +721 -0
teradataml/data/breast_cancer.csv +570 -0
teradataml/data/customer_segmentation_test.csv +2628 -0
teradataml/data/customer_segmentation_train.csv +8069 -0
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
teradataml/data/glm_example.json +28 -1
teradataml/data/housing_train_segment.csv +201 -0
teradataml/data/insect2Cols.csv +61 -0
teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
teradataml/data/kmeans_example.json +5 -0
teradataml/data/kmeans_table.csv +10 -0
teradataml/data/onehot_encoder_train.csv +4 -0
teradataml/data/openml_example.json +29 -0
teradataml/data/scale_attributes.csv +3 -0
teradataml/data/scale_example.json +52 -1
teradataml/data/scale_input_part_sparse.csv +31 -0
teradataml/data/scale_input_partitioned.csv +16 -0
teradataml/data/scale_input_sparse.csv +11 -0
teradataml/data/scale_parameters.csv +3 -0
teradataml/data/scripts/deploy_script.py +20 -1
teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
teradataml/data/teradataml_example.json +77 -0
teradataml/data/ztest_example.json +16 -0
teradataml/dataframe/copy_to.py +8 -3
teradataml/dataframe/data_transfer.py +120 -61
teradataml/dataframe/dataframe.py +102 -17
teradataml/dataframe/dataframe_utils.py +47 -9
teradataml/dataframe/fastload.py +272 -89
teradataml/dataframe/sql.py +84 -0
teradataml/dbutils/dbutils.py +2 -2
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
teradataml/options/__init__.py +13 -4
teradataml/options/configure.py +27 -6
teradataml/scriptmgmt/UserEnv.py +19 -16
teradataml/scriptmgmt/lls_utils.py +117 -14
teradataml/table_operators/Script.py +2 -3
teradataml/table_operators/TableOperator.py +58 -10
teradataml/utils/validators.py +40 -2
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0

teradataml/dataframe/sql.py CHANGED Viewed

@@ -1618,6 +1618,8 @@ class _ArithmeticColumnExpression(ColumnExpression):
     def __sub__(self, other):
         """
         Compute the difference between two ColumnExpressions using -
+        Note:
+            * Difference between two timestamp columns return value in seconds.
         PARAMETERS:
             other:
@@ -1644,6 +1646,15 @@ class _ArithmeticColumnExpression(ColumnExpression):
             2           67/06/30        07/07/10        421.0   465.0       179.0
             3           67/06/30        07/07/10        434.0   485.0       185.0
             5           67/06/30        07/07/10        459.0   509.0       211.0
+            >>> load_example_data("uaf", "Convolve2RealsLeft")
+            >>> timestamp_df = DataFrame("Convolve2RealsLeft")
+            >>> timestamp_df
+                row_seq                  row_i_time  col_seq               column_i_time    A     B     C     D
+            id
+            1         1  2018-08-08 08:02:00.000000        0  2018-08-08 08:00:00.000000  1.3  10.3  20.3  30.3
+            1         1  2018-08-08 08:02:00.000000        1  2018-08-08 08:02:00.000000  1.4  10.4  20.4  30.4
+            1         0  2018-08-08 08:00:00.000000        1  2018-08-08 08:02:00.000000  1.2  10.2  20.2  30.2
+            1         0  2018-08-08 08:00:00.000000        0  2018-08-08 08:00:00.000000  1.1  10.1  20.1  30.1
             # Example 1: Subtract 100 from the income amount and assign the final amount
             #            to new column 'remaining_income'.
@@ -1666,7 +1677,26 @@ class _ArithmeticColumnExpression(ColumnExpression):
             1           67/06/30        07/07/10        415.0   451.0       180.0             271.0
             5           67/06/30        07/07/10        459.0   509.0       211.0             298.0
             4           67/06/30        07/07/10        448.0   493.0       192.0             301.0
+            # Example 3: Subtract 2 timestamp columns and assign to new column 'seconds'.
+            >>> timestamp_df.assign(seconds = timestamp_df.row_i_time-timestamp_df.column_i_time)
+                row_seq                  row_i_time  col_seq               column_i_time    A     B     C     D  seconds
+            id
+            1         1  2018-08-08 08:02:00.000000        0  2018-08-08 08:00:00.000000  1.3  10.3  20.3  30.3    120.0
+            1         1  2018-08-08 08:02:00.000000        1  2018-08-08 08:02:00.000000  1.4  10.4  20.4  30.4      0.0
+            1         0  2018-08-08 08:00:00.000000        1  2018-08-08 08:02:00.000000  1.2  10.2  20.2  30.2   -120.0
+            1         0  2018-08-08 08:00:00.000000        0  2018-08-08 08:00:00.000000  1.1  10.1  20.1  30.1      0.0
         """
+        if isinstance(self._type, TIMESTAMP) and isinstance(other._type, TIMESTAMP):
+            s = """
+                (CAST((CAST({0} AS DATE)-CAST({1} AS DATE)) AS FLOAT) * 86400) +
+                ((EXTRACT(HOUR FROM {0}) - EXTRACT(HOUR FROM {1})) * 3600) +
+                ((EXTRACT(MINUTE FROM {0}) - EXTRACT(MINUTE FROM {1})) * 60) +
+                ((EXTRACT(SECOND FROM {0}) - EXTRACT(SECOND FROM {1})))
+                """.format(self.compile(), other.compile())
+            return _SQLColumnExpression(literal_column(s, type_ = FLOAT))
         expr = other.expression if isinstance(other, _SQLColumnExpression) else other
         res = _SQLColumnExpression(self.expression - expr)
         return res
@@ -5437,6 +5467,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         # eg: df1.col1, df2.col2
         self.__has_multiple_dataframes = False
         self.__names = []
+        self.alias_name = self.compile()
     @property
     def expression(self):
@@ -10088,3 +10120,55 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             return list(set(result))
         return []
+    def alias(self, name):
+        """
+        DESCRIPTION:
+            Function to returns this column with aliased name.
+        PARAMETERS:
+            name:
+                Required Argument.
+                Specifies the column name.
+                Type: str
+        RAISES:
+            TypeError, ValueError
+        RETURNS:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("dataframe", "admissions_train")
+            # Create a DataFrame on 'admissions_train' table.
+            >>> df = DataFrame("admissions_train")
+            >>> df
+                masters   gpa     stats programming  admitted
+            id
+            38     yes  2.65  Advanced    Beginner         1
+            7      yes  2.33    Novice      Novice         1
+            26     yes  3.57  Advanced    Advanced         1
+            5       no  3.44    Novice      Novice         0
+            3       no  3.70    Novice    Beginner         1
+            22     yes  3.46    Novice    Beginner         0
+            24      no  1.87  Advanced      Novice         1
+            36      no  3.00  Advanced      Novice         0
+            19     yes  1.98  Advanced    Advanced         0
+            40     yes  3.95    Novice    Beginner         0
+            # Example 1: Alias the resultant column after aggregation with "count_program".
+            >>> res = df.agg(df.programming.count().alias("count_program"))
+            >>> res
+               count_program
+            0             40
+        """
+        # Validate argument types
+        arg_type_matrix = [["name", name , True, (str), True]]
+        _Validators._validate_function_arguments(arg_type_matrix)
+        self.alias_name = name
+        return self

teradataml/dbutils/dbutils.py CHANGED Viewed

@@ -737,7 +737,7 @@ def _check_if_python_packages_installed():
     """
     # Check if Python interpreter and add-ons packages are installed or not.
     try:
-        query = TableOperatorConstants.CHECK_PYTHON_INSTALLED.value
+        query = TableOperatorConstants.CHECK_PYTHON_INSTALLED.value.format(configure.indb_install_location)
         UtilFuncs._execute_query(query=query)
         # If query execution is successful, then Python and add-on packages are
@@ -841,7 +841,7 @@ def db_python_package_details(names=None):
         package_str = "grep -E \"{0}\" | ".format(package_str)
     query = TableOperatorConstants.PACKAGE_VERSION_QUERY.value. \
-        format(package_str, configure.default_varchar_size)
+        format(configure.indb_install_location, package_str, configure.default_varchar_size)
     ret_val = tdmldf.dataframe.DataFrame.from_query(query)

teradataml/lib/aed_0_1.dll CHANGED Viewed

Binary file

teradataml/opensource/sklearn/_sklearn_wrapper.py CHANGED Viewed

@@ -76,7 +76,7 @@ class _GenericObjectWrapper:
         self.modelObj = None
         self._model_data = None
-        self._tdml_tmp_dir = os.path.join(os.path.expanduser("~"), ".teradataml")
+        self._tdml_tmp_dir = GarbageCollector._get_temp_dir_name()
         self._env = None
@@ -212,27 +212,40 @@ class _GenericObjectWrapper:
                 f"Script file '{file_name}' failed to get installed/replaced in Vantage."
             )
-    def _get_partition_col_indices_and_types(self, data, partition_columns):
+    def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
+                                                                idx_delim=",",
+                                                                types_delim="--"):
         """
-            partition_columns can be from feature columns and label columns.
-            So, get the indices and types of these columns from the data columns.
+        Internal function to get the data column types and partition column names, indices and types.
+        Function returns delimiter separated string of types and indices if idx_delim and
+        types_delim are provided. Otherwise, it returns list of types and indices. Partition names
+        are returned as list always.
         """
-        partition_indices = []
-        partition_types = []
+        data_column_types = "" if types_delim else []
+        partition_indices = "" if idx_delim else []
+        partition_types = "" if types_delim else []
         new_partition_columns = []
+        j = 0
         for i, col in enumerate(data.columns):
+            _type = data._td_column_names_and_sqlalchemy_types[col.lower()].python_type.__name__
+            if types_delim:
+                data_column_types += (_type if i == 0 else f"{types_delim}{_type}")
+            else:
+                data_column_types.append(_type)
             if col in partition_columns:
                 new_partition_columns.append(col)
-                partition_indices.append(i)
-                partition_types.append(data._td_column_names_and_sqlalchemy_types[col.lower()].\
-                                       python_type.__name__)
-        # Converting to string "None" if they are not present as empty string can't be passed
-        # to Script script_commands' command line arguments.
-        # Otherwise, pass the values as comma separated string.
-        partition_indices = ",".join([str(x) for x in partition_indices])\
-            if partition_indices else "None"
-        partition_types = ",".join([x for x in partition_types]) if partition_types else "None"
-        return partition_indices, partition_types, new_partition_columns
+                if idx_delim:
+                    partition_indices += (str(i) if j == 0 else f"{idx_delim}{str(i)}")
+                else:
+                    partition_indices.append(i)
+                if types_delim:
+                    partition_types += (_type if j == 0 else f"{types_delim}{_type}")
+                else:
+                    partition_types.append(_type)
+                j += 1
+        # Return types of all columns (as list or str), partition column indices (as list or str)
+        # and partition column types (as list or str).
+        return data_column_types, partition_indices, partition_types, new_partition_columns
     def _get_kwargs_str(self, kwargs):
         """
@@ -825,15 +838,15 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         else:
             classes = str(None)
             class_type = str(None)
-        partition_indices, partition_types, new_partition_columns = \
-            self._get_partition_col_indices_and_types(data, new_partition_columns)
+        data_column_types_str, partition_indices_str, _, new_partition_columns = \
+            self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
         # db_name is applicable for enterprise system.
         db_file_name = file_name if self._is_lake_system else f"./{self._db_name}/{file_name}"
         py_exc = UtilFuncs._get_python_execution_path()
         script_command = f"{py_exc} {db_file_name} {func} {len(feature_columns)} "\
-            f"{len(label_columns)} {partition_indices} {partition_types} "\
+            f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
             f"{self._model_file_name_prefix} {classes} {class_type} {self._is_lake_system}"
         # Get unique values in partitioning columns.
@@ -972,7 +985,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                           feature_columns,
                                           label_columns,
                                           func_name,
-                                          n_partitions,
                                           kwargs):
         """
         Internal function to return list of column names and their sqlalchemy types
@@ -1010,7 +1022,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # For paritioning columns, it will be a dataframe and getattr(modelObj, func_name) fails.
         # Just for getting the number of columns and their types, using only one model of all.
-        if n_partitions == 1:
+        if len(self._fit_partition_unique_values) == 1:
             # Single model case.
             skl_obj = self.modelObj
         else:
@@ -1038,11 +1050,10 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                 "path() returns tuple of ndarrays of different shapes. Not Implemented yet."
             )
-        # This import is as per scipy version 1.10.x in local machine as teradataml does not
-        # impose restrictions on this package in setup.py. TODO
-        from scipy.sparse import csr_matrix
-        if isinstance(trans_opt, csr_matrix):
+        if isinstance(trans_opt, numpy.ndarray) and trans_opt.shape == (X.shape[0],):
+            trans_opt = trans_opt.reshape(X.shape[0], 1)
+        if type(trans_opt).__name__ in ["csr_matrix", "csc_matrix"]:
             no_of_columns = trans_opt.get_shape()[1]
             trans_opt = trans_opt.toarray()
         elif isinstance(trans_opt, dict):
@@ -1054,6 +1065,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         else:
             no_of_columns = 1
+        # Special handling when inverse_transform of no_of_columns returns no of rows
+        # less than the no of classes. Such columns are filled with NaN values.
+        # Updating number of columns here (new columns with NaN values will be added).
+        if func_name == "inverse_transform" and self.class_name == "MultiLabelBinarizer":
+            no_of_columns = len(self.classes_)
+            for i in range(len(ten_row_data)):
+                trans_opt[i] += tuple([numpy.nan] * (no_of_columns - len(trans_opt[i])))
         # Special handling required for cross_decomposition classes's transform function, which
         # takes label columns also. In this case, output is a tuple of numpy arrays - x_scores and
         # y_scores. If label columns are not provided, only x_scores are returned.
@@ -1084,6 +1103,30 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get new column sqlalchemy types for pandas df columns of transform output.
         opt_pd = pd.DataFrame(trans_opt)
+        # Get output column types for each column in pandas df from the output of transform
+        # type functions.
+        types = {}
+        for idx, col in enumerate(list(opt_pd.columns)):
+            # Get type of column using data from all rows, in case if the column has None values.
+            # 'and' of types of all values in the column with type(None) gives the type of the column.
+            type_ = type(None)
+            for i in range(len(trans_opt)):
+                type_ = type_ and type(trans_opt[i][idx])
+            # If all the values of the output (trans_opt) is None, thelen use `str` as type since
+            # pandas astype() does not accept None type.
+            if type_ is type(None):
+                type_ = str
+            # numpy integer columns with nan values can't be typecasted using pd.astype() to int64.
+            # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
+            #                       Error while type casting for column '2'"
+            # Hence, using pd.Int64Dtype() for integer columns with nan values.
+            types[col] = type_ if type_ != numpy.int64 else pd.Int64Dtype()
+        # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
+        opt_pd = opt_pd.astype(types)
         # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
         # TIMESTAMP(timezone=True) else map it according to default value.
         col_types = [TIMESTAMP(timezone=True)
@@ -1123,14 +1166,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
-        partition_indices, partition_types, new_partition_columns = \
-            self._get_partition_col_indices_and_types(data, new_partition_columns)
+        data_column_types_str, partition_indices_str, _, new_partition_columns = \
+            self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
         self._validate_unique_partition_values(data, new_partition_columns)
         py_exc = UtilFuncs._get_python_execution_path()
         script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
-            f"{len(label_columns)} {partition_indices} {partition_types} "\
+            f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
             f"{self._model_file_name_prefix} {self._is_lake_system}"
         # score, aic, bic returns float values.
@@ -1191,14 +1234,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
-        partition_indices, partition_types, new_partition_columns = \
-            self._get_partition_col_indices_and_types(data, new_partition_columns)
+        data_column_types_str, partition_indices_str, _, new_partition_columns = \
+            self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
         self._validate_unique_partition_values(data, new_partition_columns)
         py_exc = UtilFuncs._get_python_execution_path()
         script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
-            f"{len(label_columns)} {partition_indices} {partition_types} "\
+            f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
             f"{self._model_file_name_prefix} {self._is_lake_system}"
         # Returning feature columns also along with transformed columns because we don't know the
@@ -1212,7 +1255,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                feature_columns,
                                                                label_columns,
                                                                func_name,
-                                                               len(new_partition_columns),
                                                                kwargs)
         # Installing model files before running sklearn_transform.py.
@@ -1253,7 +1295,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                    feature_columns,
                                                                    label_columns,
                                                                    func_name,
-                                                                   len(new_partition_columns),
                                                                    {})
         else:
             # If there are no label_columns, we will have only one
@@ -1263,14 +1304,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         file_name = "sklearn_fit_predict.py"
         self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
-        partition_indices, partition_types, new_partition_columns = \
-            self._get_partition_col_indices_and_types(data, new_partition_columns)
+        data_column_types_str, partition_indices_str, _, new_partition_columns = \
+            self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
         script_file_name = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
         py_exc = UtilFuncs._get_python_execution_path()
         script_command = f"{py_exc} {script_file_name} {len(feature_columns)} "\
-            f"{len(label_columns)} {partition_indices} {partition_types} "\
+            f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
             f"{self._model_file_name_prefix} {self._is_lake_system}"
         # Get unique values in partitioning columns.
@@ -1377,12 +1418,12 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         else:
             return_types += [("output", VARCHAR())]
-        partition_indices, partition_types, new_partition_columns = \
-            self._get_partition_col_indices_and_types(data, new_partition_columns)
+        data_column_types_str, partition_indices_str, _, new_partition_columns = \
+            self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
         py_exc = UtilFuncs._get_python_execution_path()
         script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
-            f"{partition_indices} {partition_types} {self._model_file_name_prefix} {self._is_lake_system} "\
+            f"{partition_indices_str} {data_column_types_str} {self._model_file_name_prefix} {self._is_lake_system} "\
             f"{args_str}"
         # Get unique values in partitioning columns.
@@ -1496,12 +1537,12 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
                         for col in new_partition_columns] + return_types
-        partition_indices, partition_types, new_partition_columns = \
-            self._get_partition_col_indices_and_types(data, new_partition_columns)
+        data_column_types_str, partition_indices_str, _, new_partition_columns = \
+            self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
         py_exc = UtilFuncs._get_python_execution_path()
         script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
-            f"{len(label_columns)} {len(group_columns)} {partition_indices} {partition_types} "\
+            f"{len(label_columns)} {len(group_columns)} {partition_indices_str} {data_column_types_str} "\
             f"{self._model_file_name_prefix} {self._is_lake_system}"
         # Get unique values in partitioning columns.
@@ -1586,16 +1627,14 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
         self.__params = kwargs
-        # Get indices and types of partition_columns.
-        idxs, types, partition_cols = self._get_partition_col_indices_and_types(self.__tdml_df,
-                                                                                partition_cols)
+        # Get indices of partition_columns and types of all columns.
+        data_column_types_str, partition_indices_str, _, partition_cols = \
+            self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df, partition_cols)
         script_file_path = f"{self._model_file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{self._model_file_name}"
         py_exc = UtilFuncs._get_python_execution_path()
-        script_command = (f"{py_exc} {script_file_path} {idxs}"
-                          f" ") + \
-            f"{types} {data_args_str}"
+        script_command = f"{py_exc} {script_file_path} {partition_indices_str} {data_column_types_str} {data_args_str}"
         return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
                         for col in partition_cols] + [(self.__func_name, CLOB())]
@@ -1619,17 +1658,25 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
         return self.modelObj
     def _prepare_data_args_string(self, kwargs):
+        """
+        Get column indices and types of each data related arguments in the format:
+        "{<arg_name>-<comma separated indices>-<comma separated types>}--
+         {<arg_name>-<comma separated indices>-<comma separated types>}"
+        """
         data_args_str = []
         for arg_name in list(self.__data_args.keys()):
             # Remove DataFrame arguments from kwargs, which will be passed to Script.
             kwargs.pop(arg_name)
             # Get column indices and their types for each dataframe from parent dataframe.
-            _indices, _types, _ = self._get_partition_col_indices_and_types(self.__tdml_df,
-                                                                self.__data_args[arg_name].columns)
-            # Format "<arg_name>-<comma separated indices>-<comma separated types>"
-            data_args_str.append(f"{arg_name}-{_indices}-{_types}")
+            _, partition_indices_str, partition_types_str, _ = \
+                self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df,
+                                                                   self.__data_args[arg_name].columns,
+                                                                   idx_delim=",",
+                                                                   types_delim=",")
+            # Format "<arg_name>-<comma separated indices>-<comma separated types>"
+            data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
         # Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
         #    {<arg_name>-<comma separated indices>-<comma separated types>}"

teradataml/options/__init__.py CHANGED Viewed

@@ -21,9 +21,11 @@ def set_config_params(**kwargs):
              auth_token:
                 Optional Parameter.
                 Specifies the authentication token to connect to VantageCloud Lake.
-                Note:
-                    Authentication token will expire after a specific time.
-                    One can get the new authentication token and set it again.
+                Notes:
+                     * Authentication token will expire after a specific time.
+                       One can get the new authentication token and set it again.
+                     * if "auth_token" is set through this function, then this function
+                       should always be used only after create_context.
                 Types: str
             ues_url:
@@ -77,6 +79,11 @@ def set_config_params(**kwargs):
                     The default value is the installation location of In-DB 2.0.0 packages.
                     Older versions of In-DB packages are installed at
                     "/opt/teradata/languages/Python/".
+            local_storage:
+                Specifies the location on client where garbage collector folder will be created.
+                Types: str
     RETURNS:
         bool
@@ -93,7 +100,8 @@ def set_config_params(**kwargs):
         ...                   val_install_location="VAL_USER",
         ...                   read_nos_function_mapping="read_nos_fm",
         ...                   write_nos_function_mapping="write_nos_fm",
-        ...                   indb_install_location="/opt/teradata/languages/Python")
+        ...                   indb_install_location="/opt/teradata/languages/Python",
+        ...                   local_storage="/Users/gc")
         True
         # Example 2: Alternatively, set configuration parameters without using set_config_params() function.
@@ -106,6 +114,7 @@ def set_config_params(**kwargs):
         >>> configure.read_nos_function_mapping="read_nos_fm"
         >>> configure.write_nos_function_mapping="write_nos_fm"
         >>> configure.indb_install_location="/opt/teradata/languages/Python"
+        >>> configure.local_storage = "/Users/gc/"
     """
     for option in kwargs:
         try:

teradataml/options/configure.py CHANGED Viewed

@@ -13,7 +13,6 @@ from teradataml.common.exceptions import TeradataMlException
 from teradataml.common.messages import Messages
 from teradataml.common.messagecodes import MessageCodes
 class _ConfigureSuper(object):
     def __init__(self):
@@ -58,6 +57,7 @@ class _Configure(_ConfigureSuper):
     inline_plot = _create_property('inline_plot')
     indb_install_location = _create_property('indb_install_location')
     openml_user_env = _create_property('openml_user_env')
+    local_storage = _create_property('local_storage')
     def __init__(self, default_varchar_size=1024, column_casesensitive_handler = False,
                  vantage_version="vantage1.1", val_install_location=None,
@@ -66,7 +66,7 @@ class _Configure(_ConfigureSuper):
                  read_nos_function_mapping="read_nos", write_nos_function_mapping="write_nos",
                  cran_repositories=None, inline_plot=True,
                  indb_install_location="/var/opt/teradata/languages/sles12sp3/Python/",
-                 openml_user_env=None):
+                 openml_user_env=None, local_storage=None):
         """
         PARAMETERS:
@@ -163,6 +163,13 @@ class _Configure(_ConfigureSuper):
                     # Set the environment to be used for OpenML.
                     _env_name = "OpenAF" # Name of the user defined environment.
                     teradataml.options.configure.openml_user_env = get_env(_env_name)
+            local_storage:
+                Specifies the location on client where garbage collector folder will be created.
+                Types: string
+                Example:
+                    # Set the garbage collector location to "/Users/gc/"
+                    teradataml.options.configure.local_storage = "/Users/gc/"
         """
         super().__init__()
         super().__setattr__('default_varchar_size', default_varchar_size)
@@ -179,6 +186,7 @@ class _Configure(_ConfigureSuper):
         super().__setattr__('inline_plot', True)
         super().__setattr__('indb_install_location', indb_install_location)
         super().__setattr__('openml_user_env', openml_user_env)
+        super().__setattr__('local_storage', local_storage)
         # internal configurations
         # These configurations are internal and should not be
@@ -221,6 +229,12 @@ class _Configure(_ConfigureSuper):
         super().__setattr__('_oauth_client_id', None)
         # Internal parameter, that is used for specifying the Authentication token expiry time.
         super().__setattr__('_auth_token_expiry_time', None)
+        # Internal parameter, that is used for specifying the OAuth authentication.
+        super().__setattr__('_oauth', None)
+        # Internal parameter, that is used for specifying the current database associated with current connection.
+        super().__setattr__('_current_database_name', None)
+        # Internal parameter, that is used for specifying the database username associated with current connection.
+        super().__setattr__('_database_username', None)
     def __setattr__(self, name, value):
         if hasattr(self, name):
@@ -243,7 +257,7 @@ class _Configure(_ConfigureSuper):
                                                                    "greater than or equal to"),
                                               MessageCodes.TDMLDF_POSITIVE_INT)
             elif name in ['column_casesensitive_handler', '_validate_metaexpression',
-                          '_validate_gc', 'inline_plot']:
+                          '_validate_gc', 'inline_plot', '_oauth']:
                 if not isinstance(value, bool):
                     raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
@@ -283,15 +297,21 @@ class _Configure(_ConfigureSuper):
                                                                    "a value in {}".format(valid_versions)),
                                               MessageCodes.INVALID_ARG_VALUE)
-            elif name in ['val_install_location', 'byom_install_location', 'database_version',
+            elif name in ['val_install_location', 'byom_install_location',
                           'read_nos_function_mapping', 'write_nos_function_mapping',
                           '_byom_model_catalog_database', '_byom_model_catalog_table',
                           '_byom_model_catalog_license', '_byom_model_catalog_license_source',
-                          'indb_install_location']:
+                          'indb_install_location', 'local_storage']:
                 if not isinstance(value, str):
                     raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
                                                                    'str'),
                                               MessageCodes.UNSUPPORTED_DATATYPE)
+                if name == 'local_storage':
+                    # Validate if path exists.
+                    if not os.path.exists(value):
+                        raise TeradataMlException(
+                            Messages.get_message(MessageCodes.PATH_NOT_FOUND).format(value),
+                            MessageCodes.PATH_NOT_FOUND)
             elif name in {'ues_url', '_oauth_end_point', '_oauth_client_id'}:
@@ -305,7 +325,8 @@ class _Configure(_ConfigureSuper):
                     value = value[: -1] if value.endswith("/") else value
             elif name in ['temp_table_database', 'temp_view_database',
-                          "_byom_model_catalog_license_table", "_byom_model_catalog_license_database"]:
+                          "_byom_model_catalog_license_table", "_byom_model_catalog_license_database",
+                          "_current_database_name", "_database_username", "database_version"]:
                 if not isinstance(value, str) and not isinstance(value, type(None)):
                     raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
                                                                    'str or None'),

teradataml/scriptmgmt/UserEnv.py CHANGED Viewed

@@ -23,6 +23,7 @@ from teradataml import configure
 from teradataml.utils.internal_buffer import _InternalBuffer
 from concurrent.futures import ThreadPoolExecutor, wait
 from teradataml.clients.pkce_client import _DAWorkflow
+from teradataml.clients.auth_client import _AuthWorkflow
 from teradataml.context.context import _get_user
 from teradataml.common.constants import HTTPRequest, CloudProvider
 from teradataml.common.exceptions import TeradataMlException
@@ -219,22 +220,24 @@ def _get_auth_token():
         >>>_get_auth_token()
     """
     # Check the current time. If token is expiring, get another one from refresh token.
-    if configure._auth_token_expiry_time and time.time() > configure._auth_token_expiry_time:
-        # Extract the base URL from "ues_url".
-        ues_url = configure.ues_url
-        client_id = configure._oauth_client_id
-        url_parser = urlparse(ues_url)
-        base_url = "{}://{}".format(url_parser.scheme, url_parser.netloc)
-        # Get the JWT Token details.
-        da_wf = _DAWorkflow(base_url, client_id)
-        token_data = da_wf._get_token_data()
-        # Replace the options with new values.
-        configure._auth_token_expiry_time = time.time() + token_data["expires_in"] - 15
-        # Store the jwt token in internal class attribute.
-        _InternalBuffer.add(auth_token=_AuthToken(token=token_data["access_token"]))
+    if configure._oauth:
+        if configure._auth_token_expiry_time and time.time() > configure._auth_token_expiry_time:
+            # Extract the base URL from "ues_url".
+            ues_url = configure.ues_url
+            client_id = configure._oauth_client_id
+            url_parser = urlparse(ues_url)
+            base_url = "{}://{}".format(url_parser.scheme, url_parser.netloc)
+            # Get the JWT Token details.
+            da_wf = _DAWorkflow(base_url, client_id)
+            token_data = da_wf._get_token_data()
+            # Replace the options with new values.
+            configure._auth_token_expiry_time = time.time() + token_data["expires_in"] - 15
+            # Store the jwt token in internal class attribute.
+            _InternalBuffer.add(auth_token=_AuthToken(token=token_data["access_token"]))
     return {"Authorization": "Bearer {}".format(_InternalBuffer.get("auth_token").value)}

teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.1py3-none-any.whl