PyPI - teradataml - Versions diffs - 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl - Mend

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (108) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +71 -0
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +51 -24
teradataml/analytics/json_parser/utils.py +11 -17
teradataml/automl/__init__.py +103 -48
teradataml/automl/data_preparation.py +55 -37
teradataml/automl/data_transformation.py +131 -69
teradataml/automl/feature_engineering.py +117 -185
teradataml/automl/feature_exploration.py +9 -2
teradataml/automl/model_evaluation.py +13 -25
teradataml/automl/model_training.py +214 -75
teradataml/catalog/model_cataloging_utils.py +1 -1
teradataml/clients/auth_client.py +133 -0
teradataml/common/aed_utils.py +3 -2
teradataml/common/constants.py +11 -6
teradataml/common/garbagecollector.py +5 -0
teradataml/common/messagecodes.py +3 -1
teradataml/common/messages.py +2 -1
teradataml/common/utils.py +6 -0
teradataml/context/context.py +49 -29
teradataml/data/advertising.csv +201 -0
teradataml/data/bank_marketing.csv +11163 -0
teradataml/data/bike_sharing.csv +732 -0
teradataml/data/boston2cols.csv +721 -0
teradataml/data/breast_cancer.csv +570 -0
teradataml/data/customer_segmentation_test.csv +2628 -0
teradataml/data/customer_segmentation_train.csv +8069 -0
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
teradataml/data/glm_example.json +28 -1
teradataml/data/housing_train_segment.csv +201 -0
teradataml/data/insect2Cols.csv +61 -0
teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
teradataml/data/kmeans_example.json +5 -0
teradataml/data/kmeans_table.csv +10 -0
teradataml/data/onehot_encoder_train.csv +4 -0
teradataml/data/openml_example.json +29 -0
teradataml/data/scale_attributes.csv +3 -0
teradataml/data/scale_example.json +52 -1
teradataml/data/scale_input_part_sparse.csv +31 -0
teradataml/data/scale_input_partitioned.csv +16 -0
teradataml/data/scale_input_sparse.csv +11 -0
teradataml/data/scale_parameters.csv +3 -0
teradataml/data/scripts/deploy_script.py +20 -1
teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
teradataml/data/teradataml_example.json +77 -0
teradataml/data/ztest_example.json +16 -0
teradataml/dataframe/copy_to.py +8 -3
teradataml/dataframe/data_transfer.py +120 -61
teradataml/dataframe/dataframe.py +102 -17
teradataml/dataframe/dataframe_utils.py +47 -9
teradataml/dataframe/fastload.py +272 -89
teradataml/dataframe/sql.py +84 -0
teradataml/dbutils/dbutils.py +2 -2
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
teradataml/options/__init__.py +13 -4
teradataml/options/configure.py +27 -6
teradataml/scriptmgmt/UserEnv.py +19 -16
teradataml/scriptmgmt/lls_utils.py +117 -14
teradataml/table_operators/Script.py +2 -3
teradataml/table_operators/TableOperator.py +58 -10
teradataml/utils/validators.py +40 -2
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0

teradataml/LICENSE-3RD-PARTY.pdf CHANGED Viewed

Binary file

teradataml/LICENSE.pdf CHANGED Viewed

Binary file

teradataml/README.md CHANGED Viewed

@@ -16,6 +16,77 @@ Copyright 2024, Teradata. All Rights Reserved.
 * [License](#license)
 ## Release Notes:
+#### teradataml 20.00.00.01
+* teradataml no longer supports Python versions less than 3.8.
+* ##### New Features/Functionality
+  * ##### Personal Access Token (PAT) support in teradataml
+    * `set_auth_token()` - teradataml now supports authentication via PAT in addition to
+      OAuth 2.0 Device Authorization Grant (formerly known as the Device Flow).
+      * It accepts UES URL, Personal AccessToken (PAT) and Private Key file generated from VantageCloud Lake Console
+        and optional argument `username` and `expiration_time` in seconds.
+* ##### Updates
+  * ##### teradataml: SQLE Engine Analytic Functions
+    * `ANOVA()`
+      * New arguments added: `group_name_column`, `group_value_name`, `group_names`, `num_groups` for data containing group values and group names.
+    * `FTest()`
+      * New arguments added: `sample_name_column`, `sample_name_value`, `first_sample_name`, `second_sample_name`.
+    * `GLM()`
+      * Supports stepwise regression and accept new arguments `stepwise_direction`, `max_steps_num` and `initial_stepwise_columns`.
+      * New arguments added: `attribute_data`, `parameter_data`, `iteration_mode` and `partition_column`.
+    * `GetFutileColumns()`
+      * Arguments `category_summary_column` and `threshold_value` are now optional.
+    * `KMeans()`
+      * New argument added: `initialcentroids_method`.
+    * `NonLinearCombineFit()`
+        * Argument `result_column` is now optional.
+    * `ROC()`
+        * Argument `positive_class` is now optional.
+    * `SVMPredict()`
+      * New argument added: `model_type`.
+    * `ScaleFit()`
+      * New arguments added: `ignoreinvalid_locationscale`, `unused_attributes`, `attribute_name_column`, `attribute_value_column`.
+      * Arguments `attribute_name_column`, `attribute_value_column` and `target_attributes` are supported for sparse input.
+      * Arguments `attribute_data`, `parameter_data` and `partition_column` are supported for partitioning.
+    * `ScaleTransform()`
+      * New arguments added: `attribute_name_column` and `attribute_value_column` support for sparse input.
+    * `TDGLMPredict()`
+      * New arguments added: `family` and `partition_column`.
+    * `XGBoost()`
+      * New argument `base_score` is added for initial prediction value for all data points.
+    * `XGBoostPredict()`
+      * New argument `detailed` is added for detailed information of each prediction.
+    * `ZTest()`
+      * New arguments added: `sample_name_column`, `sample_value_column`,  `first_sample_name` and `second_sample_name`.
+  * ##### teradataml: AutoML
+    * `AutoML()`, `AutoRegressor()` and `AutoClassifier()`
+      * New argument `max_models` is added as an early stopping criterion to limit the maximum number of models to be trained.
+  * ##### teradataml: DataFrame functions
+    * `DataFrame.agg()`
+      * Accepts ColumnExpressions and list of ColumnExpressions as arguments.
+  * ##### teradataml: General Functions
+    * Data Transfer Utility
+      * `fastload()` - Improved error and warning table handling with below-mentioned new arguments.
+        * `err_staging_db`
+        * `err_tbl_name`
+        * `warn_tbl_name`
+        * `err_tbl_1_suffix`
+        * `err_tbl_2_suffix`
+      * `fastload()` - Change in behaviour of `save_errors` argument.
+                       When `save_errors` is set to `True`, error information will be available in two persistent tables `ERR_1` and `ERR_2`.
+                       When `save_errors` is set to `False`, error information will be available in single pandas dataframe.
+    * Garbage collector location is now configurable.
+      User can set configure.local_storage to a desired location.
+* ##### Bug Fixes
+  * UAF functions now work if the database name has special characters.
+  * OpensourceML can now read and process NULL/nan values.
+  * Boolean values output will now be returned as VARBYTE column with 0 or 1 values in OpensourceML.
+  * Fixed bug for `Apply`'s `deploy()`.
+  * Issue with volatile table creation is fixed where it is created in the right database, i.e., user's spool space, regardless of the temp database specified.
+  * `ColumnTransformer` function now processes its arguments in the order they are passed.
 #### teradataml 20.00.00.00
 * ##### New Features/Functionality
     * ###### teradataml OpenML: Run Opensource packages through Teradata Vantage

teradataml/_version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # ##################################################################
 #
-# Copyright 2021 Teradata. All rights reserved.
+# Copyright 2024 Teradata. All rights reserved.
 # TERADATA CONFIDENTIAL AND TRADE SECRET
 #
 # Primary Owner: Pankaj Purandare (PankajVinod.Purandare@teradata.com)
@@ -8,4 +8,4 @@
 #
 # ##################################################################
-version = "20.00.00.00"
+version = "20.00.00.01"

teradataml/analytics/analytic_function_executor.py CHANGED Viewed

@@ -86,6 +86,9 @@ class _AnlyticFunctionExecutor:
         # Initialize FuncSpecialCaseHandler.
         self._spl_func_obj = FuncSpecialCaseHandler(self.func_name)
+        # Initialize database object type.
+        self.db_object_type = TeradataConstants.TERADATA_VIEW
     @staticmethod
     def _validate_analytic_function_argument(func_arg_name, func_arg_value, argument, additional_valid_types=None):
         """
@@ -178,7 +181,7 @@ class _AnlyticFunctionExecutor:
         EXAMPLES:
             self._execute_query()
-        """
+        """
         # Generate STDOUT table name and add it to the output table list.
         func_params = self._get_generate_temp_table_params(persist=persist, volatile=volatile)
         sqlmr_stdout_temp_tablename = UtilFuncs._generate_temp_table_name(**func_params)
@@ -248,25 +251,18 @@ class _AnlyticFunctionExecutor:
             self._get_generate_temp_table_params(True, True)
         """
         use_default_database = True
-        db_object_type = TeradataConstants.TERADATA_VIEW
         prefix = "td_sqlmr_out_"
         gc_on_quit = True
-        # If function produces output tables, i.e., function has output table arguments,
-        # the 'db_object_type' should be "table" or if analytic function does not support
-        # reading from a view created on output, then 'db_object_type' should be "table".
-        if len(self._metadata.output_tables) > 0 or not self._metadata._is_view_supported:
-            db_object_type = TeradataConstants.TERADATA_TABLE
-        # If result is to be persisted or if the table is a volaile table then, db_object_type
-        # should be "table" and it must not be Garbage collected.
+        # If result is to be persisted or if the table is a volaile table then,
+        # it must not be Garbage collected.
         if persist or volatile:
             gc_on_quit = False
-            db_object_type = TeradataConstants.TERADATA_TABLE
             prefix = "td_sqlmr_{}_out_".format("persist" if persist else "volatile")
+            use_default_database = False if volatile else True
         return {"use_default_database": use_default_database,
-                "table_type": db_object_type,
+                "table_type": self.db_object_type,
                 "prefix": prefix,
                 "gc_on_quit": gc_on_quit}
@@ -694,10 +690,26 @@ class _AnlyticFunctionExecutor:
                 MessageCodes.CANNOT_USE_TOGETHER_WITH)
         self._dyn_cls_data_members.update(kwargs)
+        # If function produces output tables, i.e., function has output table arguments,
+        # then 'db_object_type' should be "table" or if analytic function does not support
+        # reading from a view created on output, then 'db_object_type' should be "table".
+        # If result is to be persisted or if the table is a volaile table then, db_object_type
+        # should be "table" else it should be "view".
+        self.db_object_type = (
+            TeradataConstants.TERADATA_VOLATILE_TABLE if volatile
+            else TeradataConstants.TERADATA_TABLE if len(self._metadata.output_tables) > 0 \
+                or not self._metadata._is_view_supported or persist
+            else TeradataConstants.TERADATA_VIEW
+        )
         if not skip_input_arg_processing:
             self._process_input_argument(**kwargs)
+        # check func_name is GLM  and data_partition_column, data_hash_column, local_order_data are passed
+        if self.func_name in ['GLM', 'TDGLMPredict'] and \
+            any(key in kwargs for key in ['data_partition_column', 'data_hash_column', 'local_order_data']):
+            skip_output_arg_processing = True
         if not skip_output_arg_processing:
             self._process_output_argument(**kwargs)
@@ -856,22 +868,34 @@ class _SQLEFunctionExecutor(_AnlyticFunctionExecutor):
         EXAMPLES:
             self._get_input_args()
         """
+        sort_order = list(kwargs.keys())
+        input_table_dict = {}
         for _inp_attribute in self._metadata.input_tables:
             input_table_arg = _inp_attribute.get_lang_name()
-            yield input_table_arg, _inp_attribute
-            # Check if SQLE function allows multiple values as input.
+            # Store the first argument directly into the dictionary
+            input_table_dict[input_table_arg] = _inp_attribute
+            # Check if SQL function allows multiple values as input.
             if _inp_attribute.allows_lists():
                 _index = 1
                 while True:
                     _input_table_arg = "{}{}".format(input_table_arg, _index)
-                    # If the corresponding object is available in kwargs, then extract it.
-                    # Otherwise, stop looking for multiple arguments and proceed for next attribute.
                     if _input_table_arg in kwargs:
-                        yield _input_table_arg, _inp_attribute
-                        _index = _index + 1
+                        input_table_dict[_input_table_arg] = _inp_attribute
+                        _index += 1
                     else:
                         break
+        # For ColumnTransformer, yield the input arguments in the order they are passed.
+        if self.func_name == "ColumnTransformer":
+            for key in sort_order:
+                if key in input_table_dict:
+                    yield key, input_table_dict[key]
+        else:
+            for key in input_table_dict:
+                yield key, input_table_dict[key]
     def _process_input_argument(self, **kwargs):
         """
@@ -1707,14 +1731,17 @@ class _UAFFunctionExecutor(_SQLEFunctionExecutor):
             self._get_generate_temp_table_params(True, True)
         """
         prefix = "td_uaf_out_"
+        gc_on_quit = True
         # If result is to be persisted then, it must not be Garbage collected.
-        gc_on_quit = False if persist or volatile else True
+        if persist or volatile:
+            gc_on_quit = False
+            prefix = "td_uaf_{}_out_".format("persist" if persist else "volatile")
-        return {"table_type": TeradataConstants.TERADATA_TABLE,
+        return {"table_type": self.db_object_type,
                 "prefix": prefix,
                 "gc_on_quit": gc_on_quit,
-                "databasename": output_db if output_db else _get_context_temp_databasename()}
+                "databasename": output_db if output_db else _get_context_temp_databasename(
+                    table_type=self.db_object_type)}
     def _process_output_argument(self, **kwargs):
         """
@@ -1762,7 +1789,7 @@ class _UAFFunctionExecutor(_SQLEFunctionExecutor):
             # If database name is not provided by user, get the default database name
             # else use user provided database name.
             db_name = output_db_name if output_db_name is not None else \
-                _get_context_temp_databasename()
+                _get_context_temp_databasename(table_type=self.db_object_type)
             # Get the fully qualified table name.
             table_name = "{}.{}".format(UtilFuncs._teradata_quote_arg(db_name,

teradataml/analytics/json_parser/utils.py CHANGED Viewed

@@ -608,12 +608,16 @@ class _Evaluate:
         if self.get_function_name() == "NaiveBayesTextClassifierTrainer":
             return True
         # name of argument is model_type for most of the functions but for some it is different
-        if "model_type" not in kwargs:
+        if "model_type" not in kwargs and "tree_type" not in kwargs:
             arg_name = self.get_arg_name()
             model_type = getattr(self.obj, arg_name)
-            kwargs["model_type"] = model_type
+            if self.get_function_name() == "DecisionForest":
+                kwargs["tree_type"] = model_type
+            else:
+                kwargs["model_type"] = model_type
-        if kwargs["model_type"].lower() == "binomial" or kwargs["model_type"].lower() == "classification":
+        if  ("model_type" in kwargs and (kwargs["model_type"].lower() == "binomial" or kwargs["model_type"].lower() == "classification")) \
+            or ( "tree_type" in kwargs and kwargs["tree_type"].lower() == "classification"):
             is_classification_model = True
         return is_classification_model
@@ -720,20 +724,10 @@ class _Evaluate:
             kwargs["observation_column"] = response_column
             kwargs["prediction_column"] = "Prediction" if "Prediction" in predict.result.columns else "prediction"
-            # get the column_names and types from the metaexpr to check if the datatype of prediction column
-            # and observation column is same or not.
-            col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(predict.result._metaexpr)
-            res = dict(zip(col_names,col_types))
-            pre_col_name = kwargs["prediction_column"]
-            if res[kwargs["observation_column"]] != res[pre_col_name]:
-                # Converting the prediction column datatype to observation column datatype.
-                cast_cols_pre = {pre_col_name: getattr(predict.result, pre_col_name).expression.cast(
-                    type_=res[kwargs["observation_column"]])}
-                # Update the predicted result dataframe.
-                predict.result = predict.result.assign(**cast_cols_pre)
-            # Update the num_labels by the number of unique values.
-            kwargs["num_labels"] = predict.result.drop_duplicate(kwargs["observation_column"]).shape[0]
+            # Update the num_labels by the number of unique values if
+            # Labels are not passed.
+            if "labels" not in kwargs:
+                kwargs["num_labels"] = predict.result.drop_duplicate(kwargs["observation_column"]).shape[0]
             kwargs["data"] = predict.result

teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.1py3-none-any.whl