PyPI - teradataml - Versions diffs - 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl - Mend

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +196 -2
teradataml/__init__.py +4 -0
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +79 -4
teradataml/analytics/json_parser/metadata.py +12 -3
teradataml/analytics/json_parser/utils.py +7 -2
teradataml/analytics/sqle/__init__.py +1 -0
teradataml/analytics/table_operator/__init__.py +1 -1
teradataml/analytics/uaf/__init__.py +1 -1
teradataml/analytics/utils.py +4 -0
teradataml/automl/data_preparation.py +3 -2
teradataml/automl/feature_engineering.py +15 -7
teradataml/automl/model_training.py +39 -33
teradataml/common/__init__.py +2 -1
teradataml/common/constants.py +35 -0
teradataml/common/garbagecollector.py +2 -1
teradataml/common/messagecodes.py +8 -2
teradataml/common/messages.py +3 -1
teradataml/common/sqlbundle.py +25 -3
teradataml/common/utils.py +134 -9
teradataml/context/context.py +20 -10
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/data/dataframe_example.json +18 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
teradataml/data/medical_readings.csv +101 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -2
teradataml/data/vectordistance_example.json +4 -0
teradataml/dataframe/dataframe.py +543 -175
teradataml/dataframe/functions.py +553 -25
teradataml/dataframe/sql.py +184 -15
teradataml/dbutils/dbutils.py +556 -18
teradataml/dbutils/filemgr.py +48 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
teradataml/opensource/_lightgbm.py +950 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
teradataml/options/__init__.py +7 -23
teradataml/options/configure.py +29 -3
teradataml/scriptmgmt/UserEnv.py +3 -3
teradataml/scriptmgmt/lls_utils.py +74 -21
teradataml/store/__init__.py +13 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2223 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/store/vector_store/__init__.py +1586 -0
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +37 -38
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/utils/dtypes.py +4 -2
teradataml/utils/validators.py +33 -1
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0

teradataml/LICENSE-3RD-PARTY.pdf CHANGED Viewed

Binary file

teradataml/README.md CHANGED Viewed

@@ -16,6 +16,187 @@ Copyright 2024, Teradata. All Rights Reserved.
 * [License](#license)
 ## Release Notes:
+#### teradataml 20.00.00.03
+* teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
+* ##### New Features/Functionality
+  * ###### teradataml: DataFrame
+    * New Function
+      * `alias()` - Creates a DataFrame with alias name.
+    * New Properties
+      * `db_object_name` - Get the underlying database object name, on which DataFrame is created.
+  * ###### teradataml: GeoDataFrame
+    * New Function
+      * `alias()` - Creates a GeoDataFrame with alias name.
+  * ###### teradataml: DataFrameColumn a.k.a. ColumnExpression
+    * _Arithmetic Functions_
+      * `DataFrameColumn.isnan()` - Function evaluates expression to determine if the floating-point
+                                    argument is a NaN (Not-a-Number) value.
+      * `DataFrameColumn.isinf()` - Function evaluates expression to determine if the floating-point
+                                    argument is an infinite number.
+      * `DataFrameColumn.isfinite()` - Function evaluates expression to determine if it is a finite
+                                       floating value.
+  * ###### FeatureStore - handles feature management within the Vantage environment
+    * FeatureStore Components
+      * Feature - Represents a feature which is used in ML Modeling.
+      * Entity - Represents the columns which serves as uniqueness for the data used in ML Modeling.
+      * DataSource - Represents the source of Data.
+      * FeatureGroup - Collection of Feature, Entity and DataSource.
+        * Methods
+          * `apply()` - Adds Feature, Entity, DataSource to a FeatureGroup.
+          * `from_DataFrame()` - Creates a FeatureGroup from teradataml DataFrame.
+          * `from_query()` - Creates a FeatureGroup using a SQL query.
+          * `remove()` - Removes Feature, Entity, or DataSource from a FeatureGroup.
+          * `reset_labels()` - Removes the labels assigned to the FeatureGroup, that are set using `set_labels()`.
+          * `set_labels()` - Sets the Features as labels for a FeatureGroup.
+        * Properties
+          * `features` - Get the features of a FeatureGroup.
+          * `labels` - Get the labels of FeatureGroup.
+    * FeatureStore
+      * Methods
+        * `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
+        * `archive_data_source()` - Archives a specified DataSource from a FeatureStore.
+        * `archive_entity()` - Archives a specified Entity from a FeatureStore.
+        * `archive_feature()` - Archives a specified Feature from a FeatureStore.
+        * `archive_feature_group()` - Archives a specified FeatureGroup from a FeatureStore. Method archives underlying Feature, Entity, DataSource also.
+        * `delete_data_source()` - Deletes an archived DataSource.
+        * `delete_entity()` - Deletes an archived Entity.
+        * `delete_feature()` - Deletes an archived Feature.
+        * `delete_feature_group()` - Deletes an archived FeatureGroup.
+        * `get_data_source()` - Get the DataSources associated with FeatureStore.
+        * `get_dataset()` - Get the teradataml DataFrame based on Features, Entities and DataSource from FeatureGroup.
+        * `get_entity()` - Get the Entity associated with FeatureStore.
+        * `get_feature()` - Get the Feature associated with FeatureStore.
+        * `get_feature_group()` - Get the FeatureGroup associated with FeatureStore.
+        * `list_data_sources()` - List DataSources.
+        * `list_entities()` - List Entities.
+        * `list_feature_groups()` - List FeatureGroups.
+        * `list_features()` - List Features.
+        * `list_repos()` - List available repos which are configured for FeatureStore.
+        * `repair()` - Repairs the underlying FeatureStore schema on database.
+        * `set_features_active()` - Marks the Features as active.
+        * `set_features_inactive()` - Marks the Features as inactive.
+        * `setup()` - Setup the FeatureStore for a repo.
+      * Property
+        * `repo` - Property for FeatureStore repo.
+        * `grant` - Property to Grant access on FeatureStore to user.
+        * `revoke` - Property to Revoke access on FeatureStore from user.
+  * ###### teradataml: Table Operator Functions
+    * `Image2Matrix()` - Converts an image into a matrix.
+  * ###### teradataml: SQLE Engine Analytic Functions
+    * New Analytics Database Analytic Functions:
+      * `CFilter()`
+      * `NaiveBayes()`
+      * `TDNaiveBayesPredict()`
+      * `Shap()`
+      * `SMOTE()`
+    * ###### teradataml: Unbounded Array Framework (UAF) Functions
+      * New Unbounded Array Framework(UAF) Functions:
+        * `CopyArt()`
+  * ###### General functions
+    * Vantage File Management Functions
+      * `list_files()` - List the installed files in Database.
+  * ###### OpensourceML: LightGBM
+    * teradataml adds support for lightGBM package through `OpensourceML` (`OpenML`) feature.
+      The following functionality is added in the current release:
+      * `td_lightgbm` - Interface object to run lightgbm functions and classes through Teradata Vantage.
+      Example usage below:
+        ```
+        from teradataml import td_lightgbm, DataFrame
+        df_train = DataFrame("multi_model_classification")
+        feature_columns = ["col1", "col2", "col3", "col4"]
+        label_columns = ["label"]
+        part_columns = ["partition_column_1", "partition_column_2"]
+        df_x = df_train.select(feature_columns)
+        df_y = df_train.select(label_columns)
+        # Dataset creation.
+        # Single model case.
+        obj_s = td_lightgbm.Dataset(df_x, df_y, silent=True, free_raw_data=False)
+        # Multi model case.
+        obj_m = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
+        obj_m_v = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
+        ## Model training.
+        # Single model case.
+        opt = td_lightgbm.train(params={}, train_set = obj_s, num_boost_round=30)
+        opt.predict(data=df_x, num_iteration=20, pred_contrib=True)
+        # Multi model case.
+        opt = td_lightgbm.train(params={}, train_set = obj_m, num_boost_round=30,
+                                callbacks=[td_lightgbm.record_evaluation(rec)],
+                                valid_sets=[obj_m_v, obj_m_v])
+        # Passing `label` argument to get it returned in output DataFrame.
+        opt.predict(data=df_x, label=df_y, num_iteration=20)
+        ```
+      * Added support for accessing scikit-learn APIs using exposed inteface object `td_lightgbm`.
+    Refer Teradata Python Package User Guide for more details of this feature, arguments, usage, examples and supportability in Vantage.
+  * ###### teradataml: Functions
+    * `register()` - Registers a user defined function (UDF).
+    * `call_udf()` - Calls a registered user defined function (UDF) and returns ColumnExpression.
+    * `list_udfs()` - List all the UDFs registered using 'register()' function.
+    * `deregister()` - Deregisters a user defined function (UDF).
+  * ###### teradataml: Options
+    * Configuration Options
+      * `table_operator` - Specifies the name of table operator.
+* ##### Updates
+  * ###### General functions
+    * `set_auth_token()` - Added `base_url` parameter which accepts the CCP url.
+                           'ues_url' will be deprecated in future and users
+                           will need to specify 'base_url' instead.
+  * ###### teradataml: DataFrame function
+     * `join()`
+       * Now supports compound ColumExpression having more than one binary operator in `on` argument.
+       * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
+       * self-join now expects aliased DataFrame in `other` argument.
+  * ###### teradataml: GeoDataFrame function
+     * `join()`
+       * Now supports compound ColumExpression having more than one binary operator in `on` argument.
+       * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
+       * self-join now expects aliased DataFrame in `other` argument.
+  * ###### teradataml: Unbounded Array Framework (UAF) Functions
+    * `SAX()` - Default value added for `window_size` and `output_frequency`.
+    * `DickeyFuller()`
+      * Supports TDAnalyticResult as input.
+      * Default value added for `max_lags`.
+      * Removed parameter `drift_trend_formula`.
+      * Updated permitted values for `algorithm`.
+  * ##### teradataml: AutoML
+    * `AutoML`, `AutoRegressor` and `AutoClassifier`
+      * Now supports DECIMAL datatype as input.
+  * ##### teradataml: SQLE Engine Analytic Functions
+    * `TextParser()`
+      * Argument name `covert_to_lowercase` changed to `convert_to_lowercase`.
+* ##### Bug Fixes
+  * `db_list_tables()` now returns correct results when '%' is used.
 #### teradataml 20.00.00.02
 * teradataml will no longer be supported with SQLAlchemy < 2.0.
@@ -83,6 +264,10 @@ Copyright 2024, Teradata. All Rights Reserved.
       * Following arguments will be deprecated in the future:
         * `ues_url`
         * `auth_token`
+  * #### teradata DataFrame
+    * `to_pandas()` - Function returns the pandas dataframe with Decimal columns types as float instead of object.
+                      If user want datatype to be object, set argument `coerce_float` to False.
   * ###### Database Utility
       * `list_td_reserved_keywords()` - Accepts a list of strings as argument.
@@ -102,7 +287,7 @@ Copyright 2024, Teradata. All Rights Reserved.
 * ##### Bug Fixes
   * KNN `predict()` function can now predict on test data which does not contain target column.
   * Metrics functions are supported on the Lake system.
-  * The following OpensourceML functions from different sklearn modules are fixed.
+  * The following OpensourceML functions from different sklearn modules in single model case are fixed.
     * `sklearn.ensemble`:
       * ExtraTreesClassifier - `apply()`
       * ExtraTreesRegressor - `apply()`
@@ -115,12 +300,21 @@ Copyright 2024, Teradata. All Rights Reserved.
       * Nystroem - `transform()`, `fit_transform()`
       * PolynomialCountSketch - `transform()`, `fit_transform()`
       * RBFSampler - `transform()`, `fit_transform()`
-    * `sklearn.neighbours`:
+    * `sklearn.neighbors`:
       * KNeighborsTransformer - `transform()`, `fit_transform()`
       * RadiusNeighborsTransformer - `transform()`, `fit_transform()`
     * `sklearn.preprocessing`:
       * KernelCenterer - `transform()`
       * OneHotEncoder - `transform()`, `inverse_transform()`
+  * The following OpensourceML functions from different sklearn modules in multi model case are fixed.
+    * `sklearn.feature_selection`:
+      * SelectFpr - `transform()`, `fit_transform()`, `inverse_transform()`
+      * SelectFdr - `transform()`, `fit_transform()`, `inverse_transform()`
+      * SelectFromModel - `transform()`, `fit_transform()`, `inverse_transform()`
+      * SelectFwe - `transform()`, `fit_transform()`, `inverse_transform()`
+      * RFECV - `transform()`, `fit_transform()`, `inverse_transform()`
+    * `sklearn.clustering`:
+      * Birch - `transform()`, `fit_transform()`
   * OpensourceML returns teradataml objects for model attributes and functions instead of sklearn
     objects so that the user can perform further operations like `score()`, `predict()` etc on top
     of the returned objects.

teradataml/__init__.py CHANGED Viewed

@@ -71,3 +71,7 @@ session_queryband.configure_queryband_parameters(app_name="TDML", app_version=__
 # Import functions.
 from teradataml.dataframe.functions import *
+# Import FeatureStore and VectorStore
+from teradataml.store import *

teradataml/_version.py CHANGED Viewed

@@ -8,4 +8,4 @@
 #
 # ##################################################################
-version = "20.00.00.02"
+version = "20.00.00.03"

teradataml/analytics/analytic_function_executor.py CHANGED Viewed

@@ -28,8 +28,8 @@ from teradataml.common.messages import Messages, MessageCodes
 from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
 from teradataml.common.utils import UtilFuncs
 from teradataml.context.context import _get_context_temp_databasename
-from teradataml.dataframe.dataframe import in_schema
-from teradataml.dbutils.dbutils import _create_table, db_drop_table
+from teradataml.dataframe.dataframe import in_schema, DataFrame
+from teradataml.dbutils.dbutils import _create_table, db_drop_table, list_td_reserved_keywords
 from teradatasqlalchemy.types import *
 from teradataml.table_operators.table_operator_query_generator import TableOperatorQueryGenerator
 from teradataml.telemetry_utils.queryband import collect_queryband
@@ -343,6 +343,17 @@ class _AnlyticFunctionExecutor:
                 self._func_output_args.append(temp_table_name)
                 self._function_output_table_map[lang_name] = temp_table_name
+    def _get_column_name_from_feature(self, obj):
+        # Extract the associated column name from Feature.
+        from teradataml.store.feature_store.feature_store import Feature
+        if isinstance(obj, Feature):
+            return obj.column_name
+        if isinstance(obj, list):
+            return [self._get_column_name_from_feature(col) for col in obj]
+        return obj
     def _process_other_argument(self, **kwargs):
         """
         DESCRIPTION:
@@ -439,6 +450,9 @@ class _AnlyticFunctionExecutor:
             self._validate_analytic_function_argument(arg_name, arg_value, argument)
+            # Extract column names if it is a Feature.
+            arg_value = self._get_column_name_from_feature(arg_value)
             # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
             # Check lower bound and upper bound for number type of arguments.
             if isinstance(arg_value, (int, float)):
@@ -474,6 +488,12 @@ class _AnlyticFunctionExecutor:
                 # does not require special case handler.
                 arg_value = self._spl_func_obj._add_square_bracket(arg_value)
+                # Handling special case for Teradata reserved keywords or column names with spaces.
+                # If argument is a string or list of strings, then add quotes to the string.
+                if arg_name not in ["partition_columns"] and (\
+                    UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
+                    arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
             # SequenceInputBy arguments require special processing.
             if 500 <= argument.get_r_order_number() <= 510:
                 quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
@@ -535,6 +555,17 @@ class _AnlyticFunctionExecutor:
             return repr_string
         self._dyn_cls_data_members["__repr__"] = print_result
+        def copy(self, **args):
+            """ Function to copy the ART to another table."""
+            from teradataml import CopyArt
+            params = {
+                "data": self.result,
+                "database_name": args.get("database_name", None),
+                "table_name": args.get("table_name", None),
+                "map_name": args.get("map_name", None),
+                "persist": args.get("persist", False)}
+            return CopyArt(**params)
         query = self.sqlmr_query
         build_time = None if self.__build_time is None else round(self.__build_time, 2)
@@ -544,6 +575,7 @@ class _AnlyticFunctionExecutor:
         # To list attributes using dict()
         self._dyn_cls_data_members["__dict__"] = self._dyn_cls_data_members
         self._dyn_cls_data_members["_mlresults"] = self._mlresults
+        self._dyn_cls_data_members["copy"] = copy
         # Dynamic class creation with In-DB function name.
         indb_class = type(self.func_name, (object,), self._dyn_cls_data_members)
@@ -700,6 +732,14 @@ class _AnlyticFunctionExecutor:
             raise TeradataMlException(
                 Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH, "persist", "volatile"),
                 MessageCodes.CANNOT_USE_TOGETHER_WITH)
+        # If function is VectorDistance and largereference_input is set to True,
+        # then set data_partition_column to PartitionKind.DIMENSION and
+        # reference_data_partition_column to PartitionKind.ANY .
+        if self.func_name == "VectorDistance" and \
+            kwargs.get("largereference_input", False):
+            kwargs['target_data_partition_column'] = PartitionKind.DIMENSION
+            kwargs['reference_data_partition_column'] = PartitionKind.ANY
         self._dyn_cls_data_members.update(kwargs)
@@ -721,6 +761,11 @@ class _AnlyticFunctionExecutor:
         if self.func_name in ['GLM', 'TDGLMPredict'] and \
             any(key in kwargs for key in ['data_partition_column', 'data_hash_column', 'local_order_data']):
             skip_output_arg_processing = True
+        elif self.func_name in ['CopyArt']:
+            # CopyArt function take care of persisting the result table internally
+            # through 'permanent_table' argument.
+            persist = False
+            volatile = False
         if not skip_output_arg_processing:
             self._process_output_argument(**kwargs)
@@ -2180,6 +2225,31 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
             self._func_other_args['database_name'] = UtilFuncs._teradata_quote_arg(schema_name, "\'", False)
             self._func_other_args['table_name'] = UtilFuncs._teradata_quote_arg(table_name, "\'", False)
+        # 'CopyArt' function requires 'SRC_DATABASENMAE' and 'SRC_TABLENAME' as input arguments.
+        # Extract the database and table name from the 'data' argument and add them to the
+        # '_func_other_args' dictionary.
+        if self.func_name == "CopyArt":
+            data = kwargs.get('data', None)
+            argument_info = ["data", data, False, (DataFrame), True]
+            # 'data' is a required argument for 'CopyArt' function to get the source table name and database name.
+            _Validators._validate_missing_required_arguments([argument_info])
+            # 'data' should be a DataFrame.
+            _Validators._validate_function_arguments([argument_info])
+            # Add the 'SRC_DATABASENMAE' and 'SRC_TABLENAME' to the '_func_other_args' dictionary.
+            self._func_other_args["SRC_DATABASENMAE"] = "'{0}'".format(UtilFuncs._extract_db_name(data._table_name))
+            self._func_other_args["SRC_TABLENAME"] = "'{0}'".format(UtilFuncs._extract_table_name(data._table_name))
+            # Setting permanent_table to True if 'persist' is set to True, else False.
+            kwargs['permanent_table'] = 'True' if kwargs.get('persist', False) else 'False'
+            # Setting 'map_name' to empty string if not provided.
+            if kwargs.get('map_name', None) is None:
+                kwargs['map_name'] = ""
+            # CopyArt does not take 'data' as input argument.
+            kwargs.pop('data')
         for argument in self._metadata.arguments:
             sql_name = argument.get_name()
             lang_name = argument.get_lang_name()
@@ -2236,8 +2306,13 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
           Internal function to process the function output.
         """
         for lang_name, table_name in self._function_output_table_map.items():
-            out_table_name = UtilFuncs._extract_table_name(table_name)
-            out_db_name = UtilFuncs._extract_db_name(table_name)
+            # For 'CopyArt' function, the result should be the destination table name and database name provided as input.
+            if self.func_name == "CopyArt":
+                out_table_name = kwargs.get('table_name')
+                out_db_name = kwargs.get('database_name')
+            else:
+                out_table_name = UtilFuncs._extract_table_name(table_name)
+                out_db_name = UtilFuncs._extract_db_name(table_name)
             df = self._awu._create_data_set_object(
                 df_input=out_table_name, database_name=out_db_name, source_type="table")
             self._dyn_cls_data_members[lang_name] = df

teradataml/analytics/json_parser/metadata.py CHANGED Viewed

@@ -1179,10 +1179,19 @@ class _AnlyFuncMetadata:
             # from teradataml.data.docs.<function_type>.<doc_dir_with_version_info>.<func_name>
             # import <func_name>
             func_module = __import__(("teradataml.data.docs.{}.{}.{}".
-                                      format(function_type, doc_dir, self.func_name)),
-                                     fromlist=[self.func_name])
-            return getattr(func_module, self.func_name).__doc__
+                                    format(function_type, doc_dir, self.func_name)),
+                                    fromlist=[self.func_name])
+            return getattr(func_module, self.func_name).__doc__
         except:
+            # For db_version 20.00, if function type is sqle, then check for docs_17_20 directory.
+            if version_dir == '20.00' and function_type == 'sqle':
+                try:
+                    func_module = __import__(("teradataml.data.docs.{}.{}.{}".
+                                             format(function_type, "docs_17_20", self.func_name)),
+                                             fromlist=[self.func_name])
+                    return getattr(func_module, self.func_name).__doc__
+                except:
+                    pass
             return ("Refer to Teradata Package for Python Function Reference guide for "
                     "Documentation. Reference guide can be found at: https://docs.teradata.com ."
                     "Refer to the section with Database version: {}".format(self.__database_version))

teradataml/analytics/json_parser/utils.py CHANGED Viewed

@@ -54,7 +54,6 @@ def _get_json_data_from_tdml_repo():
     # both versions are matched, then the json store has data available so no need
     # to parse again.
     if configure.database_version != _JsonStore.version:
         # Json store version is different from database version. So, json's should
         # be parsed again. Before parsing the json, first clean the json store.
         _JsonStore.clean()
@@ -171,9 +170,15 @@ def __get_json_files_directory():
         if func_info.value["lowest_version"]:
             # Check if current function type is allowed on connected Vantage version or not.
             if func_info.value["func_type"] in func_type_json_version.keys():
+                # If function type is SQLE and db_version is 20.00, then add 17.20 JSON directory.
+                if func_type_json_version[func_info.value["func_type"]] == '20.00' and  \
+                   func_info.value["func_type"] == 'sqle':
+                    yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info,
+                                                         version='17.20'),
+                                                         func_info.name]
                 yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info,
                                                      version=func_type_json_version[func_info.value["func_type"]]),
-                       func_info.name]
+                                                     func_info.name]
         else:
             yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info), func_info.name]

teradataml/analytics/sqle/__init__.py CHANGED Viewed

@@ -71,6 +71,7 @@ _sqle_functions = ['ANOVA',
                   'Silhouette',
                   'SimpleImputeFit',
                   'SimpleImputeTransform',
+                  'SMOTE',
                   'StrApply',
                   'StringSimilarity',
                   'SVM',

teradataml/analytics/table_operator/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from teradataml.analytics.meta_class import _AnalyticFunction
 from teradataml.analytics.meta_class import _common_init, _common_dir
-_nos_functions = ['ReadNOS', 'WriteNOS']
+_nos_functions = ['ReadNOS', 'WriteNOS', 'Image2Matrix']
 for func in _nos_functions:
     globals()[func] = type("{}".format(func), (_AnalyticFunction,),

teradataml/analytics/uaf/__init__.py CHANGED Viewed

@@ -73,7 +73,7 @@ for func in _uaf_functions:
                             "__doc__": _AnalyticFunction.__doc__,
                             "__dir__": _common_dir})
-_stored_procedure = ['FilterFactory1d']
+_stored_procedure = ['CopyArt', 'FilterFactory1d']
 for func in _stored_procedure:
     globals()[func] = type("{}".format(func), (_AnalyticFunction,),

teradataml/analytics/utils.py CHANGED Viewed

@@ -441,6 +441,10 @@ class FuncSpecialCaseHandler():
                                                "filter_type": self._single_quote_arg,
                                                "window_type": self._single_quote_arg,
                                                "filter_description": self._single_quote_arg},
+                           "CopyArt":{"database_name": self._single_quote_arg,
+                                      "table_name": self._single_quote_arg,
+                                      "map_name": self._single_quote_arg,
+                                      "permanent_table": self._single_quote_arg},
                            "DWT": {"wavelet": self._single_quote_arg},
                            "IDWT": {"part": self._single_quote_arg,
                                     "wavelet": self._single_quote_arg,

teradataml/automl/data_preparation.py CHANGED Viewed

@@ -465,7 +465,7 @@ class _DataPreparation:
         RETURNS:
             int, number of folds to be used for cross-validation.
         """
-        num_of_folds = lambda rows: 1 if rows > 20000 else (3 if 1000 < rows <= 20000 else 10)
+        num_of_folds = lambda rows: 2 if rows > 20000 else (4 if 1000 < rows <= 20000 else 10)
         return num_of_folds(rows)
     def _feature_selection_PCA(self):
@@ -783,7 +783,8 @@ class _DataPreparation:
         for col in data.columns:
             # Selecting columns that will be scaled
             # Exculding target_col and columns with single value
-            if col not in ['id', self.target_column] and data.drop_duplicate(col).size > 1:
+            if col not in ['id', self.target_column] and \
+            data.drop_duplicate(col).size > 1:
                 columns_to_scale.append(col)
         if feature_selection_mtd == "lasso":

teradataml/automl/feature_engineering.py CHANGED Viewed

@@ -40,6 +40,7 @@ from teradataml.common.garbagecollector import GarbageCollector
 from teradataml.dataframe.sql_functions import case
 from teradataml.hyperparameter_tuner.utils import _ProgressBar
 from teradataml.utils.validators import _Validators
+from teradataml.common.utils import UtilFuncs
 class _FeatureEngineering:
@@ -273,7 +274,7 @@ class _FeatureEngineering:
                           show_data=True)
         start_time = time.time()
         rows = self.data.shape[0]
-        self.data=self.data.drop_duplicate()
+        self.data=self.data.drop_duplicate(self.data.columns)
         if rows != self.data.shape[0]:
             self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
                               data=self.data,
@@ -565,11 +566,18 @@ class _FeatureEngineering:
         # Removing rows with missing target column value
         self.data = self.data.dropna(subset=[self.target_column])
+        params = {
+            "data": self.data,
+            "target_columns": self.data.columns,
+            "persist": True,
+            "display_table_name": False
+        }
-        obj = ColumnSummary(data=self.data,
-                            target_columns=self.data.columns,
-                            volatile=self.volatile,
-                            persist=self.persist)
+        obj = ColumnSummary(**params)
+        # Adding transformed data containing table to garbage collector
+        GarbageCollector._add_to_garbagecollector(obj.result._table_name)
         cols_miss_val={}
         # Iterating over each row in the column summary result
@@ -704,7 +712,7 @@ class _FeatureEngineering:
         for key, val in self.imputation_cols.items():
             col_stat.append(key)
-            if self.data_types[key] in ['float', 'int']:
+            if self.data_types[key] in ['float', 'int', 'decimal.Decimal']:
                 val = skew_data[f'skew_{key}']
                 # Median imputation method, if abs(skewness value) > 1
                 if abs(val) > 1:
@@ -713,7 +721,7 @@ class _FeatureEngineering:
                 else:
                     stat.append('mean')
             # Mode imputation method, if categorical column
-            else:
+            elif self.data_types[key] in ['str']:
                 stat.append('mode')
         self._display_msg(msg="Columns with their imputation method:",

teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.3py3-none-any.whl