PyPI - teradataml - Versions diffs - 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl - Mend

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +196 -2
teradataml/__init__.py +4 -0
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +79 -4
teradataml/analytics/json_parser/metadata.py +12 -3
teradataml/analytics/json_parser/utils.py +7 -2
teradataml/analytics/sqle/__init__.py +1 -0
teradataml/analytics/table_operator/__init__.py +1 -1
teradataml/analytics/uaf/__init__.py +1 -1
teradataml/analytics/utils.py +4 -0
teradataml/automl/data_preparation.py +3 -2
teradataml/automl/feature_engineering.py +15 -7
teradataml/automl/model_training.py +39 -33
teradataml/common/__init__.py +2 -1
teradataml/common/constants.py +35 -0
teradataml/common/garbagecollector.py +2 -1
teradataml/common/messagecodes.py +8 -2
teradataml/common/messages.py +3 -1
teradataml/common/sqlbundle.py +25 -3
teradataml/common/utils.py +134 -9
teradataml/context/context.py +20 -10
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/data/dataframe_example.json +18 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
teradataml/data/medical_readings.csv +101 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -2
teradataml/data/vectordistance_example.json +4 -0
teradataml/dataframe/dataframe.py +543 -175
teradataml/dataframe/functions.py +553 -25
teradataml/dataframe/sql.py +184 -15
teradataml/dbutils/dbutils.py +556 -18
teradataml/dbutils/filemgr.py +48 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
teradataml/opensource/_lightgbm.py +950 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
teradataml/options/__init__.py +7 -23
teradataml/options/configure.py +29 -3
teradataml/scriptmgmt/UserEnv.py +3 -3
teradataml/scriptmgmt/lls_utils.py +74 -21
teradataml/store/__init__.py +13 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2223 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/store/vector_store/__init__.py +1586 -0
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +37 -38
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/utils/dtypes.py +4 -2
teradataml/utils/validators.py +33 -1
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0

teradataml/automl/model_training.py CHANGED Viewed

@@ -28,6 +28,7 @@ from teradataml.dataframe.dataframe import DataFrame
 from teradataml import execute_sql, get_connection
 from teradataml import SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
 from teradataml.utils.validators import _Validators
+from teradataml.common.utils import UtilFuncs
 class _ModelTraining:
@@ -796,7 +797,8 @@ class _ModelTraining:
         trained_models = []
         for param in model_params:
             result = self._hyperparameter_tunning(param, trainng_datas)
-            trained_models.append(result)
+            if result is not None:
+                trained_models.append(result)
         models_df = pd.concat(trained_models, ignore_index=True)
         return models_df
@@ -886,39 +888,43 @@ class _ModelTraining:
         # Getting all passed models
         model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
                                             on='MODEL_ID', how='inner')
-        # Creating mapping data ID to feature selection method
-        data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
-                                "DF_1": ('rfe', train_data[1]._table_name),
-                                "DF_2": ('pca', train_data[2]._table_name)}
-        # Updating model stats with feature selection method and result table
-        for index, row in model_info.iterrows():
-            model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
-            model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
-            model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
-            model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
-        # Dropping column 'DATA_ID'
-        model_info.drop(['DATA_ID'], axis=1, inplace=True)
-        model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
-        if not self.is_classification_type():
-            # Calculating Adjusted-R2 for regression
-            # Getting size and feature count for each feature selection method
-            methods = ["lasso", "rfe", "pca"]
-            size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
-            feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
-            model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
-                1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
-                (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
-        self._display_msg(msg="-"*100,
-                          progress_bar=self.progress_bar,
-                          show_data=True)
-        self.progress_bar.update()
+        if not model_info.empty:
+            # Creating mapping data ID to feature selection method
+            data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
+                                    "DF_1": ('rfe', train_data[1]._table_name),
+                                    "DF_2": ('pca', train_data[2]._table_name)}
+            # Updating model stats with feature selection method and result table
+            for index, row in model_info.iterrows():
+                model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
+                model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
+                model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
+                model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
+            # Dropping column 'DATA_ID'
+            model_info.drop(['DATA_ID'], axis=1, inplace=True)
-        return model_info
+            model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
+            if not self.is_classification_type():
+                # Calculating Adjusted-R2 for regression
+                # Getting size and feature count for each feature selection method
+                methods = ["lasso", "rfe", "pca"]
+                size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
+                feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
+                model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
+                    1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
+                    (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
+            self._display_msg(msg="-"*100,
+                            progress_bar=self.progress_bar,
+                            show_data=True)
+            self.progress_bar.update()
+            return model_info
+        # Returning None, if no model is passed
+        return None
     @staticmethod
     def _eval_params_generation(ml_name,

teradataml/common/__init__.py CHANGED Viewed

@@ -1 +1,2 @@
-from teradataml.common.formula import as_categorical
+from teradataml.common.formula import as_categorical
+from teradataml.common.constants import Action, Permission

teradataml/common/constants.py CHANGED Viewed

@@ -18,6 +18,14 @@ from teradataml.options.configure import configure
 from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER, VARCHAR)
 from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
 from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
+from teradatasqlalchemy import (CHAR, CLOB)
+from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
+from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
+                                INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
+                                INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
+                                INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
+                                INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
+                                INTERVAL_SECOND)
 from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
@@ -53,6 +61,7 @@ class SQLConstants(Enum):
     SQL_DELETE_ALL_ROWS = 29
     SQL_DELETE_SPECIFIC_ROW = 30
     SQL_EXEC_STORED_PROCEDURE = 31
+    SQL_SELECT_COLUMNNAMES_WITH_WHERE = 32
     CONSTRAINT = ["check_constraint", "primary_key_constraint",
                   "foreign_key_constraint", "unique_key_constraint"]
@@ -123,6 +132,14 @@ class TeradataTypes(Enum):
     TD_DATE_TYPES = [DATE, sqlalchemy.sql.sqltypes.Date]
     TD_DATE_CODES = ["DA"]
     TD_NULL_TYPE = "NULLTYPE"
+    TD_ALL_TYPES = (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT, NUMBER,
+                    TIMESTAMP, DATE, TIME, CHAR, VARCHAR, CLOB, BYTE, VARBYTE,
+                    BLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP,
+                    INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
+                    INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
+                    INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
+                    INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
+                    INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND)
 class TeradataTableKindConstants(Enum):
@@ -427,6 +444,8 @@ class TableOperatorConstants(Enum):
     APPLY_TEMPLATE = "dataframe_apply.template"
     # Template of the intermediate script that will be generated for UDF.
     UDF_TEMPLATE = "dataframe_udf.template"
+    # Template of the intermediate script that will be generated for register.
+    REGISTER_TEMPLATE = "dataframe_register.template"
     # In-DB execution mode.
     INDB_EXEC = "IN-DB"
     # Local execution mode.
@@ -443,6 +462,8 @@ class TableOperatorConstants(Enum):
     APPLY_OP = "apply"
     # udf operation.
     UDF_OP = "udf"
+    # register operation.
+    REGISTER_OP = "register"
     # Template of the script_executor that will be used to generate the temporary script_executor file.
     SCRIPT_TEMPLATE = "script_executor.template"
     # Log Type.
@@ -480,6 +501,9 @@ class TableOperatorConstants(Enum):
                                                     "delimiter(' ') " \
                                                     "returns('package VARCHAR({2}), " \
                                                     "version VARCHAR({2})'))"
+    SCRIPT_LIST_FILES_QUERY = "SELECT DISTINCT * FROM SCRIPT (SCRIPT_COMMAND " \
+                       "('ls ./{}') RETURNS ('Files VARCHAR({})'))"
 class ValibConstants(Enum):
     # A dictionary that maps teradataml name of the exposed VALIB function name
@@ -1473,3 +1497,14 @@ class SessionParamsPythonNames:
     DATABASE = "Current DataBase"
     DATEFORM = 'Current DateForm'
+class Action(Enum):
+    # Holds variable names for the type of grant to be provided.
+    GRANT = "GRANT"
+    REVOKE = "REVOKE"
+class Permission(Enum):
+    # Holds variable names for the type of permission to be provided.
+    READ = "READ"
+    WRITE = "WRITE"

teradataml/common/garbagecollector.py CHANGED Viewed

@@ -520,7 +520,8 @@ class GarbageCollector():
                             fileparts = file.split(GarbageCollector.__filenameseperator)
                             hostname = fileparts[1]
                             filepid = int(fileparts[2])
-                            if hostname == tdmlctx.context._get_host_ip():
+                            # Check for both host ip and hostname in case user passed hostname for creating connection.
+                            if hostname == tdmlctx.context._get_host_ip() or hostname == tdmlctx.context._get_host():
                                 if filepid == os.getpid() or not psutil.pid_exists(filepid):
                                     tempfiles.append(filepath)
                         except (IndexError, ValueError):

teradataml/common/messagecodes.py CHANGED Viewed

@@ -86,6 +86,7 @@ class ErrorInfoCodes(Enum):
     LIST_DB_TABLES_FAILED = 'TDML_2053'
     INVALID_CONTEXT_CONNECTION = 'TDML_2054'
     TDMLDF_REQUIRED_TABLE_ALIAS = "TDML_2055"
+    TDMLDF_ALIAS_REQUIRED = TDMLDF_REQUIRED_TABLE_ALIAS
     TDMLDF_COLUMN_ALREADY_EXISTS = "TDML_2056"
     TDMLDF_AGGREGATE_INVALID_COLUMN = 'TDML_2057'
     TDMLDF_AGGREGATE_COMBINED_ERR = 'TDML_2058'
@@ -221,6 +222,9 @@ class ErrorInfoCodes(Enum):
     INVALID_PARTITIONING_COLS = 'TDML_2540'
     TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
+    # OpenAF Error codes starting from 2551 - Reserved till 2560.
+    AUTH_TOKEN_REQUIRED = 'TDML_2551'
 class MessageCodes(Enum):
     """
     MessageCodes contains all the messages that are displayed to the user which are informational
@@ -274,6 +278,7 @@ class MessageCodes(Enum):
                                       "and df.select([['col1', 'col2', 'col3']])."
     TDMLDF_INVALID_TABLE_ALIAS      = "{} should not be equal."
     TDMLDF_REQUIRED_TABLE_ALIAS     = "All arguments lsuffix, rsuffix, lprefix and rprefix should not be None as TeradataML DataFrames contains common column(s)."
+    TDMLDF_ALIAS_REQUIRED           = "Use aliased DataFrames for self {}."
     TDMLDF_COLUMN_ALREADY_EXISTS    = "Column name with alias '{}' already exists in {} TeradataML DataFrame, change '{}'"
     TDMLDF_INVALID_JOIN_CONDITION   = "Invalid 'on' condition(s): '{}', check documentation for valid conditions."
     TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS = "Number of columns in '{}' and '{}' should be equal."
@@ -419,9 +424,10 @@ class MessageCodes(Enum):
                 "they should be present in '{}' DataFrame."
     PARTITIONING_COLS_IN_FEATURE_COLS = "Columns in '{}' argument should not be part of"\
                                         " feature columns."
-    PARTITION_VALUES_NOT_MATCHING = "Values in training and test data partition columns should be same."
+    PARTITION_VALUES_NOT_MATCHING = "Values in {} and {} data partition columns should be same."
     PARTITION_IN_BOTH_FIT_AND_PREDICT = "Use \"partition_columns\" only if model is fitted with partition_column(s)."
     INVALID_PARTITIONING_COLS = "Provided partition_column(s) '{}' is/are not present in parent of '{}' DataFrame(s)."
     PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
     TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
-                                        "evaluate() requires target column to be present in the dataFrame."
+                                        "evaluate() requires target column to be present in the dataFrame."
+    AUTH_TOKEN_REQUIRED = "Authentication token is required to run '{}'. Set the token using set_auth_token()."

teradataml/common/messages.py CHANGED Viewed

@@ -88,6 +88,7 @@ class Messages():
             [ErrorInfoCodes.TDMLDF_INVALID_JOIN_CONDITION, MessageCodes.TDMLDF_INVALID_JOIN_CONDITION],
             [ErrorInfoCodes.TDMLDF_INVALID_TABLE_ALIAS, MessageCodes.TDMLDF_INVALID_TABLE_ALIAS],
             [ErrorInfoCodes.TDMLDF_REQUIRED_TABLE_ALIAS, MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS],
+            [ErrorInfoCodes.TDMLDF_ALIAS_REQUIRED, MessageCodes.TDMLDF_ALIAS_REQUIRED],
             [ErrorInfoCodes.TDMLDF_COLUMN_ALREADY_EXISTS, MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS],
             [ErrorInfoCodes.INVALID_LENGTH_ARGS, MessageCodes.INVALID_LENGTH_ARGS],
             [ErrorInfoCodes.TDMLDF_AGGREGATE_UNSUPPORTED, MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED],
@@ -190,7 +191,8 @@ class Messages():
             [ErrorInfoCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT, MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT],
             [ErrorInfoCodes.INVALID_PARTITIONING_COLS, MessageCodes.INVALID_PARTITIONING_COLS],
             [ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND],
-            [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE]
+            [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE],
+            [ErrorInfoCodes.AUTH_TOKEN_REQUIRED, MessageCodes.AUTH_TOKEN_REQUIRED],
     ]
     @staticmethod

teradataml/common/sqlbundle.py CHANGED Viewed

@@ -67,7 +67,8 @@ class SQLBundle:
                 [SQLConstants.SQL_DELETE_ALL_ROWS, "DELETE FROM {0}"],
                 [SQLConstants.SQL_DELETE_SPECIFIC_ROW, "DELETE FROM {0} WHERE {1}"],
                 [SQLConstants.SQL_CREATE_TABLE_USING_COLUMNS, "CREATE MULTISET TABLE {0}( {1} )"],
-                [SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"]
+                [SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"],
+                [SQLConstants.SQL_SELECT_COLUMNNAMES_WITH_WHERE, "sel {0} from {1} where {2}"],
         ]
         self._add_sql_version()
@@ -446,11 +447,32 @@ class SQLBundle:
         query = sqlbundle._get_sql_query(SQLConstants.SQL_SELECT_DATABASENAME).format(schema_name)
         if table_name:
             if '%' in table_name:
-                query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
+                # Check if '%' is present in the between of table name excluding first and last character
+                # as '%' can be present at the start or end of table name
+                # and replace single quotes with empty string
+                # Checking for following cases with % in between table name:
+                # eg: table_name = 'ab%c', '%a%bc', '%ab%c%' or 'ab%c%'
+                # table_name[1:-1] - Removing single quotes from table name
+                # table_name[1:-1][1:-1] - Removing first and last character from table_name[1:-1]
+                if '%' in table_name[1:-1][1:-1]:
+                    query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
+                else:
+                    # Checking for following cases with % at the start or end of table name:
+                    # eg: table_name = '%abc', 'abc%', '%abc%'
+                    # Extracting table name without '%' character
+                    table_name_str = table_name.replace('%', '')
+                    # Adding condition to check if table name contains the string using POSITION function
+                    # POSITION function returns the position index of the substring in the string if found,
+                    # else returns 0
+                    query = "{0}{1}{2}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name), \
+                                            " AND POSITION({0} IN TABLENAME) > 0".format(table_name_str))
             else:
-                query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME).format(table_name))
+                query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
         if table_kind:
             query = '{0}{1}'.format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_KIND).format(table_kind))
         return query
     # TODO :: Following SQLConstants needs to be implemented as and when needed.

teradataml/common/utils.py CHANGED Viewed

@@ -13,6 +13,7 @@ by other classes which can be reused according to the need.
 Add all the common functions in this class like creating temporary table names, getting
 the datatypes etc.
 """
+from inspect import getsource
 import json
 import uuid
 from math import floor
@@ -43,6 +44,7 @@ from teradataml.options.configure import configure
 from teradataml.options.display import display
 from teradataml.common.constants import TeradataReservedKeywords, TeradataConstants
+from teradataml.utils.internal_buffer import _InternalBuffer
 from teradatasqlalchemy.types import _TDType
 from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT,
                                       DECIMAL, FLOAT, NUMBER)
@@ -1249,6 +1251,41 @@ class UtilFuncs():
             return UtilFuncs._teradata_quote_arg(keyword, "\"", False)
         return keyword
+    def _contains_space(item):
+        """
+        Check if the specified string in item has spaces or tabs in it.
+        PARAMETERS:
+            item:
+                Required Argument.
+                Specifies a string to check for spaces or tabs.
+                Types: str
+        RETURNS:
+            True, if the specified string has spaces or tabs in it, else False.
+        RAISES:
+            None.
+        EXAMPLES:
+            # Passing column name with spaces returns True.
+            is_space = UtilFuncs._contains_space("col name")
+            print(is_space)
+            # Passing column name without spaces returns False.
+            is_space = UtilFuncs._contains_space("colname")
+            print(is_space)
+        """
+        # Check if the input is a string and look for spaces or tabs
+        if isinstance(item, str):
+            return any(char in {' ', '\t'} for char in item)
+        # If the input is a list, check each element
+        if isinstance(item, list):
+            # Check each item in the list
+            return any(UtilFuncs._contains_space(col) for col in item)
+        return False
     @staticmethod
     def _in_schema(schema_name, table_name):
@@ -2358,8 +2395,16 @@ class UtilFuncs():
             >>> self._is_lake()
         """
-        from teradataml.context.context import _get_database_version
-        return int(_get_database_version().split(".")[0]) >= 20
+        tbl_operator = configure.table_operator.lower() \
+            if configure.table_operator else None
+        # If the user does not provide a table_operator, check the database version
+        # and determine the system type accordingly.
+        if tbl_operator is None:
+            from teradataml.context.context import _get_database_version
+            return int(_get_database_version().split(".")[0]) >= 20
+        return tbl_operator == "apply"
     @staticmethod
     def _get_python_execution_path():
@@ -2379,8 +2424,8 @@ class UtilFuncs():
          EXAMPLES:
              >>> self._get_python_execution_path()
          """
-        # 'indb_install_location' expects python installation directory path.
-        # Hence, postfixing python binary path.
+        # 'indb_install_location' expects python installation directory path.
+        # Hence, postfixing python binary path.
         return "python" if UtilFuncs._is_lake() else \
             '{}/bin/python3'.format(configure.indb_install_location)
@@ -2403,7 +2448,7 @@ class UtilFuncs():
         """
         db_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_db_name(tablename), "\"")
         table_view_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_table_name(tablename), "\"")
-        query = SQLBundle._build_select_table_kind(db_name, "{0}".format(table_view_name), "'V'")
+        query = SQLBundle._build_select_table_kind(db_name, "'{0}'".format(table_view_name), "'V'")
         df = UtilFuncs._execute_query(query)
         if len(df) > 0:
@@ -2471,7 +2516,7 @@ class UtilFuncs():
         except Exception as exc:
             raise exc
-    def _get_env_name(col):
+    def _get_env_name(col=None):
         """
         DESCRIPTION:
             Internal function to get the env name if passed with ColumnExpression
@@ -2479,9 +2524,10 @@ class UtilFuncs():
         PARAMETERS:
             col:
-                Required Argument.
+                Optional Argument.
                 Specifies teradataml DataFrame ColumnExpression.
                 Types: teradataml DataFrame ColumnExpression
+                Default Value: None
         RAISES:
             None.
@@ -2493,10 +2539,10 @@ class UtilFuncs():
             >>> self._get_env_name(col)
         """
-        # If env_name is passed with ColumnExpression fetch the env name,
+        # If ColumnExpression is passed and env_name is passed with it fetch the env name,
         # else check if default "openml_user_env" env is configured or not,
         # else get the default "openml_env" env if exists or create new deafult env.
-        if col._env_name is not None:
+        if col and col._env_name is not None:
             from teradataml.scriptmgmt.UserEnv import UserEnv
             env = col._env_name
             env_name = env.env_name if isinstance(col._env_name, UserEnv) else env
@@ -2506,5 +2552,84 @@ class UtilFuncs():
             env_name = UtilFuncs._create_or_get_env("open_source_ml.json").env_name
         return env_name
+    def _func_to_string(user_functions):
+        """
+        DESCRIPTION:
+            Internal function to get the user functions in a single string format.
+        PARAMETERS:
+            user_functions:
+                Required Argument.
+                List of user functions.
+                Types: list
+        RAISES:
+            None.
+        RETURNS:
+            string
+        EXAMPLES:
+            >>> from teradataml.dataframe.functions import udf
+            >>> @udf(returns=VARCHAR())
+            ... def sum(x, y):
+            ...     return x+y
+            >>>
+            >>> def to_upper(s):
+            ...    return s.upper()
+            >>> user_functions = [sum(1,2)._udf, to_upper]
+            >>> res = self._func_to_string(user_functions)
+            >>> print(res)
+            def sum(x, y):
+                return x+y
+            def to_upper(s):
+                return s.upper()
+            >>>
+        """
+        user_function_code = ""
+        for func in user_functions:
+            # Get the source code of the user function.
+            func = getsource(func)
+            # If the function have any extra space in the beginning remove it.
+            func = func.lstrip()
+            # Function can have decorator,e.g. udf as decorator, remove it.
+            if func.startswith("@"):
+                func = func[func.find("\n")+1: ].lstrip()
+            # If multiple functions are passed, separate them with new line.
+            user_function_code += func + '\n'
+        return user_function_code
+    @staticmethod
+    def _get_qualified_table_name(schema_name, table_name):
+        """
+        DESCRIPTION:
+            Internal function to get the fully qualified name of table.
+        PARAMETERS:
+            schema_name:
+                Required Argument.
+                Specifies the name of the schema.
+                Types: str
+            table_name:
+                Required Argument.
+                Specifies the name of the table.
+                Types: str
+        RAISES:
+            None.
+        RETURNS:
+            string
+        EXAMPLES:
+            >>> UtilFuncs._get_qualified_table_name("schema_name", "table_name")
+            '"schema_name"."table_name"'
+        """
+        return '"{}"."{}"'.format(schema_name, table_name)
 from teradataml.common.aed_utils import AedUtils
 from teradataml.dbutils.filemgr import remove_file

teradataml/context/context.py CHANGED Viewed

@@ -1010,19 +1010,21 @@ def _get_host():
     EXAMPLES:
         _get_host()
     """
-    global td_sqlalchemy_engine
-    return td_sqlalchemy_engine.url.host
+    if td_connection is None:
+        return None
+    else:
+        return td_sqlalchemy_engine.url.host
 def _get_host_ip():
     """
     DESCRIPTION:
-        Function to return the host IP address.
+        Function to return the host IP address or host name associated with the current context.
     PARAMETERS:
         None.
     RETURNS:
-        Host IP address.
+        Host IP address or host name associated with the current context.
     RAISES:
         None.
@@ -1034,21 +1036,29 @@ def _get_host_ip():
     if td_connection is None:
         return None
-    host_ip = _get_host()
+    host = _get_host()
     try:
         # Validate if host_ip is a valid IP address (IPv4 or IPv6)
-        ipaddress.ip_address(host_ip)
+        ipaddress.ip_address(host)
+        return host
     except ValueError:
         # If host is not an IP address, get the IP address by DNS name from _InternalBuffer.
-        host_ip = _InternalBuffer.get('dns_host_ip')
-        if host_ip is None:
-            global td_sqlalchemy_engine
+        dns_host_ip = _InternalBuffer.get('dns_host_ip')
+        if dns_host_ip:
+            return dns_host_ip
+        # If DNS host ip not found, resolve the host name to get the IP address.
+        # If there is issue in resolving the host name, it will proceed with DNS host as it is.
+        try:
             # Get the list of addresses(compatible for both IPv4 and IPv6)
-            addr_info = socket.getaddrinfo(td_sqlalchemy_engine.url.host, None)
+            addr_info = socket.getaddrinfo(host, None)
             # Pick the first address from the list
             host_ip = addr_info[0][4][0]
             # Add the DNS host IP to the _InternalBuffer.
             _InternalBuffer.add(dns_host_ip=host_ip)
+        except socket.gaierror:
+            # Use dns host as it is
+            host_ip = host
     return host_ip
 class ContextUtilFuncs():

teradataml/data/SQL_Fundamentals.pdf ADDED Viewed

Binary file

teradataml/data/dataframe_example.json CHANGED Viewed

@@ -152,6 +152,22 @@
     "item" : "varchar(20)",
     "sku" : "integer",
     "category" : "varchar(20)"
-   }
+   },
+  "medical_readings": {
+    "patient_id":  "BIGINT",
+    "record_timestamp":  "timestamp",
+    "glucose":  "BIGINT",
+    "blood_pressure":  "BIGINT",
+    "insulin":  "BIGINT",
+    "diabetes_pedigree_function":  "FLOAT",
+    "outcome": "BIGINT"
+  },
+  "patient_profile": {
+    "patient_id":  "BIGINT",
+    "record_timestamp":  "timestamp",
+    "pregnancies":  "BIGINT",
+    "age":  "BIGINT",
+    "bmi":  "FLOAT",
+    "skin_thickness":  "FLOAT"
+  }
 }

teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py CHANGED Viewed

@@ -131,7 +131,7 @@ def  NaiveBayes(data = None, response_column = None, numeric_inputs = None,
         display_analytic_functions()
         # Import function  NaiveBayes.
-        from teradataml import  NaiveBayes
+        from teradataml import  NaiveBayes, Unpivoting
         # Example 1: NaiveBayes function to generate classification model using Dense input.
         NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',

teradataml/data/docs/sqle/docs_17_20/Shap.py CHANGED Viewed

@@ -1,4 +1,4 @@
-def Shap(data = None, object = None, training_function = "TD_GLM",
+def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM",
          model_type = "Regression", input_columns = None, detailed = False,
          accumulate = None, num_parallel_trees = 1000, num_boost_rounds = 10,
          **generic_arguments):
@@ -19,6 +19,12 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
             Required Argument.
             Specifies the teradataml DataFrame containing the model data.
             Types: teradataml DataFrame
+        id_column:
+            Required Argument.
+            Specifies the input data column name that has the unique identifier
+            for each row in the "data".
+            Types: str
         training_function:
             Required Argument.

teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py CHANGED Viewed

@@ -133,10 +133,10 @@ def  TDNaiveBayesPredict(data = None, object = None, id_column = None,
         # Check the list of available analytic functions.
         display_analytic_functions()
-        # Import function  NaiveBayesPredict.
-        from teradataml import  NaiveBayesPredict
+        # Import function  TDNaiveBayesPredict.
+        from teradataml import  TDNaiveBayesPredict, NaiveBayes, Unpivoting
-        # Example 1: NaiveBayesPredict function to predict the classification label using Dense input.
+        # Example 1: TDNaiveBayesPredict function to predict the classification label using Dense input.
         NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
                                     numeric_inputs=['price','lotsize','bedrooms','bathrms','stories','garagepl'],
                                     categorical_inputs=['driveway','recroom','fullbase','gashw','airco','prefarea'])
@@ -152,7 +152,7 @@ def  TDNaiveBayesPredict(data = None, object = None, id_column = None,
         # Print the result DataFrame.
         print( NaiveBayesPredict_out.result)
-        # Example 2: NaiveBayesPredict function to predict the classification label using Sparse input.
+        # Example 2: TDNaiveBayesPredict function to predict the classification label using Sparse input.
         # Unpivoting the data for sparse input to naive bayes.
         upvt_train = Unpivoting(data = housing_train, id_column = 'sn',

teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.3py3-none-any.whl