PyPI - teradataml - Versions diffs - 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl - Mend

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +315 -2
teradataml/__init__.py +4 -0
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +95 -8
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/metadata.py +12 -3
teradataml/analytics/json_parser/utils.py +7 -2
teradataml/analytics/sqle/__init__.py +5 -1
teradataml/analytics/table_operator/__init__.py +1 -1
teradataml/analytics/uaf/__init__.py +1 -1
teradataml/analytics/utils.py +4 -0
teradataml/analytics/valib.py +18 -4
teradataml/automl/__init__.py +51 -6
teradataml/automl/data_preparation.py +59 -35
teradataml/automl/data_transformation.py +58 -33
teradataml/automl/feature_engineering.py +27 -12
teradataml/automl/model_training.py +73 -46
teradataml/common/constants.py +88 -29
teradataml/common/garbagecollector.py +2 -1
teradataml/common/messagecodes.py +19 -3
teradataml/common/messages.py +6 -1
teradataml/common/sqlbundle.py +64 -12
teradataml/common/utils.py +246 -47
teradataml/common/warnings.py +11 -0
teradataml/context/context.py +161 -27
teradataml/data/amazon_reviews_25.csv +26 -0
teradataml/data/byom_example.json +11 -0
teradataml/data/dataframe_example.json +18 -2
teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/hnsw_alter_data.csv +5 -0
teradataml/data/hnsw_data.csv +10 -0
teradataml/data/jsons/byom/h2opredict.json +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +266 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
teradataml/data/medical_readings.csv +101 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -2
teradataml/data/teradataml_example.json +8 -0
teradataml/data/vectordistance_example.json +4 -0
teradataml/dataframe/copy_to.py +8 -3
teradataml/dataframe/data_transfer.py +11 -1
teradataml/dataframe/dataframe.py +1049 -285
teradataml/dataframe/dataframe_utils.py +152 -20
teradataml/dataframe/functions.py +578 -35
teradataml/dataframe/setop.py +11 -6
teradataml/dataframe/sql.py +185 -16
teradataml/dbutils/dbutils.py +1049 -115
teradataml/dbutils/filemgr.py +48 -1
teradataml/hyperparameter_tuner/optimizer.py +12 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/_base.py +1466 -0
teradataml/opensource/_class.py +464 -0
teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
teradataml/opensource/_lightgbm.py +949 -0
teradataml/opensource/_sklearn.py +1008 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
teradataml/options/__init__.py +54 -38
teradataml/options/configure.py +131 -27
teradataml/options/display.py +13 -2
teradataml/plot/axis.py +47 -8
teradataml/plot/figure.py +33 -0
teradataml/plot/plot.py +63 -13
teradataml/scriptmgmt/UserEnv.py +5 -5
teradataml/scriptmgmt/lls_utils.py +130 -40
teradataml/store/__init__.py +12 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2318 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/table_operators/Apply.py +32 -18
teradataml/table_operators/Script.py +3 -1
teradataml/table_operators/TableOperator.py +3 -1
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +37 -38
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/utils/dtypes.py +51 -2
teradataml/utils/internal_buffer.py +18 -0
teradataml/utils/validators.py +99 -8
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_class.py +0 -255
teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0

teradataml/dataframe/setop.py CHANGED Viewed

@@ -149,7 +149,7 @@ def __check_concat_compatibility(df_list, join, sort, ignore_index):
     # Iterate on all DFs to be applied for set operation.
     for df in dfs_to_operate_on:
         # Process each column in the DF of the iteration.
-        for c in df._metaexpr.t.c:
+        for c in df._metaexpr.c:
             col_name = c.name
             # Process the column name if it is not already processed.
             # Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
@@ -193,6 +193,8 @@ def __check_concat_compatibility(df_list, join, sort, ignore_index):
                         col_dict[col_name]['col_present'] = col_present_in_dfs
                         # The type to be used for the column is the one of the first DF it is present in.
                         col_dict[col_name]['col_type'] = col_types_in_dfs[0]
+                        # Column name stored with quotes if required.
+                        col_dict[col_name]['name'] = c.compile()
                         # If the type of the column in all DFs is not the same, then the operation is not lazy.
                         if not all(ctype == col_dict[col_name]['col_type']
@@ -217,6 +219,8 @@ def __check_concat_compatibility(df_list, join, sort, ignore_index):
                         col_dict[col_name]['col_present'] = col_present_in_dfs
                         # The type to be used for the column is the one of the first DF it is present in.
                         col_dict[col_name]['col_type'] = non_none_type_to_add
+                        # Column name stored with quotes if required.
+                        col_dict[col_name]['name'] = c.compile()
                         # If the type of the column in all DFs is not the same, then the operation is not lazy.
                         if not all(True if ctype is None else ctype == non_none_type_to_add
@@ -667,15 +671,16 @@ def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_inde
         # Now create the list of columns for each DataFrame to concatenate
         type_compiler = td_type_compiler(td_dialect)
         for col_name, value in master_columns_dict.items():
             for i in range(len(col_list)):
+                # Quoting is already done for column names if column name starts with number or it is reserved keywords.
+                # Here checking again if it is teradata keyword or not for quotes.
+                column_name = UtilFuncs._process_for_teradata_keyword(value['name'])
                 if not value['col_present'][i]:
-                    col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']),
-                                                                       UtilFuncs._teradata_quote_arg(col_name, "\"",
-                                                                                                     False)))
+                    col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']), column_name))
                 else:
-                    col_name = UtilFuncs._process_for_teradata_keyword(col_name)
-                    col_list[i].append(col_name)
+                    col_list[i].append(column_name)
         input_table_columns = []
         for i in range(len(col_list)):

teradataml/dataframe/sql.py CHANGED Viewed

@@ -222,6 +222,10 @@ class _MetaExpression(object):
     def __repr__(self):
       return repr(self.__t)
+    def _get_table_expr(self):
+        return self.__t
 class _PandasTableExpression(TableExpression):
     def _assign(self, drop_columns, **kw):
@@ -261,7 +265,7 @@ class _PandasTableExpression(TableExpression):
         existing = [(c.name, c) for c in self.c]
         new = [(label, expression) for label, expression in kw.items() if label not in current]
-        new = sorted(new, key = lambda x: x[0])
+        new = sorted(new, key=lambda x: x[0])
         for alias, expression in existing + new:
             if drop_columns and alias not in kw:
@@ -484,6 +488,7 @@ class _SQLTableExpression(_PandasTableExpression):
             columns = []
             for c in kw['column_order']:
                 name = c.strip()
+                # Get case-insensitive column names from Table object.
                 col = table.c.get(name, table.c.get(name.lower(), table.c.get(name.upper())))
                 if col is None:
@@ -5473,7 +5478,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         self._env_name = kw.get("env_name", None)
         self._delimiter = kw.get("delimiter", None)
         self._quotechar = kw.get("quotechar", None)
-        self.alias_name = self.compile() if self._udf is None else None
+        self._udf_script = kw.get("udf_script", None)
+        self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
     @property
     def expression(self):
@@ -5653,23 +5659,23 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         """
         Calls the compile method of the underlying sqlalchemy.Column
         """
-        if len(kw) == 0:
-            kw = dict({'dialect': td_dialect(),
-                'compile_kwargs':
-                    {
-                        'include_table': False,
-                        'literal_binds': True
-                    }
-                })
-        return str(self.expression.compile(*args, **kw))
+        kw_new = dict({'dialect': td_dialect(),
+                       'compile_kwargs':
+                           {
+                                'include_table': False,
+                                'literal_binds': True
+                           }
+                       })
+        if len(kw) != 0:
+            kw_new.update(kw)
+        return str(self.expression.compile(*args, **kw_new))
     def compile_label(self, label):
         """
         DESCRIPTION:
             Compiles expression with label, by calling underlying sqlalchemy methods.
-        PARAMETES:
+        PARAMETERS:
             label:
                 Required Argument.
                 Specifies the label to be used to alias the compiled expression.
@@ -5699,7 +5705,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             with the "value". Use this function either to replace or remove
             NA from Column.
-        PARAMETES:
+        PARAMETERS:
             value:
                 Required Argument.
                 Specifies the replacement value for null values in the column.
@@ -6186,12 +6192,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
                     # If user has not passed any type, then set it to
                     # NullType().
                     type = sqlalc.sql.sqltypes.NullType()
+                # Boolean flag to treat function as an instance method.
+                function_has_col_caller = column_function
                 # Generate the function syntax based on whether the
                 # function is column function or not.
                 if column_function:
                     name = quoted_name("{}.{}".format(col_name, func_name),
                                        False)
+                    # Dynamic function gets called on teradataml._SQLColumnExpression type object.
+                    # 'expression' attribute of _SQLColumnExpression object holds
+                    # corresponding SQLAlchemy.Expression type object.
+                    # SQLAlchemy.Expression type object should be available from FunctionElement.
+                    # This 'func_caller' attribute points to that Expression object.
+                    func_caller = self.expression
                 else:
                     name = quoted_name(func_name, False)
@@ -10809,4 +10822,160 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             whens = case([((self != 0) & (base != 0) & (base.ln() != 0),
                            (self.ln() / base.ln()).cast(FLOAT))])
-        return whens
+        return whens
+    def isnan(self):
+        """
+        DESCRIPTION:
+            Function evaluates a variable or expression to determine if the
+            floating-point argument is a NaN (Not-a-Number) value. When a database
+            table contains a NaN value, the data is undefined and unrepresentable
+            in floating-point arithmetic. For example, division by 0, or the square root
+            of a negative number would return a NaN result.
+        RETURNS:
+            ColumnExpression.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml","titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            40         14.0   11.2417
+            774         NaN    7.2250
+            366        30.0    7.2500
+            509        28.0   22.5250
+            795        25.0    7.8958
+            61         22.0    7.2292
+            469         NaN    7.7250
+            >>>
+            # Example 1: Find whether 'fare' column contains NaN values or not.
+            >>> nan_df = df.assign(nanornot = df.fare.isnan())
+            >>> print(nan_df)
+                        age      fare nanornot
+            passenger
+            326        36.0  135.6333        0
+            183         9.0   31.3875        0
+            652        18.0   23.0000        0
+            40         14.0   11.2417        0
+            774         NaN    7.2250        0
+            366        30.0    7.2500        0
+            509        28.0   22.5250        0
+            795        25.0    7.8958        0
+            61         22.0    7.2292        0
+            469         NaN    7.7250        0
+            >>>
+        """
+        return _SQLColumnExpression(literal_column(f"TD_ISNAN({self.compile()})"), type=INTEGER)
+    def isinf(self):
+        """
+        DESCRIPTION:
+            Function evaluates a variable or expression to determine if the
+            floating-point argument is an infinite number. This function determines
+            if a database table contains positive or negative infinite values.
+        RETURNS:
+            ColumnExpression.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml","titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            40         14.0   11.2417
+            774         NaN    7.2250
+            366        30.0    7.2500
+            509        28.0   22.5250
+            795        25.0    7.8958
+            61         22.0    7.2292
+            469         NaN    7.7250
+            >>>
+            # Example 1: Find whether 'fare' column contains infinity values or not.
+            >>> inf_df = df.assign(infornot = df.fare.isinf())
+            >>> print(inf_df)
+                        age      fare infornot
+            passenger
+            326        36.0  135.6333        0
+            183         9.0   31.3875        0
+            652        18.0   23.0000        0
+            40         14.0   11.2417        0
+            774         NaN    7.2250        0
+            366        30.0    7.2500        0
+            509        28.0   22.5250        0
+            795        25.0    7.8958        0
+            61         22.0    7.2292        0
+            469         NaN    7.7250        0
+            >>>
+        """
+        return _SQLColumnExpression(literal_column(f"TD_ISINF({self.compile()})"), type=INTEGER)
+    def isfinite(self):
+        """
+        DESCRIPTION:
+            Function evaluates a variable or expression to determine if
+            it is a finite floating value. A finite floating value is not
+            a NaN (Not a Number) value and is not an infinity value.
+        RETURNS:
+            ColumnExpression.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml","titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            40         14.0   11.2417
+            774         NaN    7.2250
+            366        30.0    7.2500
+            509        28.0   22.5250
+            795        25.0    7.8958
+            61         22.0    7.2292
+            469         NaN    7.7250
+            >>>
+            # Example 1: Find whether 'fare' column contains finite values or not.
+            >>> finite_df = df.assign(finiteornot = df.fare.isfinite())
+            >>> print(finite_df)
+                        age    fare finiteornot
+            passenger
+            530        23.0  11.500           1
+            591        35.0   7.125           1
+            387         1.0  46.900           1
+            856        18.0   9.350           1
+            244        22.0   7.125           1
+            713        48.0  52.000           1
+            448        34.0  26.550           1
+            122         NaN   8.050           1
+            734        23.0  13.000           1
+            265         NaN   7.750           1
+            >>>
+        """
+        return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)

teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.4py3-none-any.whl