PyPI - teradataml - Versions diffs - 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl - Mend

teradataml 20.0.0.6py3-none-any.whl → 20.0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show

teradataml/README.md +210 -0
teradataml/__init__.py +1 -1
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +162 -76
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/__init__.py +2 -0
teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
teradataml/analytics/json_parser/metadata.py +22 -4
teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
teradataml/analytics/sqle/__init__.py +3 -0
teradataml/analytics/utils.py +4 -1
teradataml/automl/__init__.py +2369 -464
teradataml/automl/autodataprep/__init__.py +15 -0
teradataml/automl/custom_json_utils.py +184 -112
teradataml/automl/data_preparation.py +113 -58
teradataml/automl/data_transformation.py +154 -53
teradataml/automl/feature_engineering.py +113 -53
teradataml/automl/feature_exploration.py +548 -25
teradataml/automl/model_evaluation.py +260 -32
teradataml/automl/model_training.py +399 -206
teradataml/clients/auth_client.py +2 -2
teradataml/common/aed_utils.py +11 -2
teradataml/common/bulk_exposed_utils.py +4 -2
teradataml/common/constants.py +62 -2
teradataml/common/garbagecollector.py +50 -21
teradataml/common/messagecodes.py +47 -2
teradataml/common/messages.py +19 -1
teradataml/common/sqlbundle.py +23 -6
teradataml/common/utils.py +116 -10
teradataml/context/aed_context.py +16 -10
teradataml/data/Employee.csv +5 -0
teradataml/data/Employee_Address.csv +4 -0
teradataml/data/Employee_roles.csv +5 -0
teradataml/data/JulesBelvezeDummyData.csv +100 -0
teradataml/data/byom_example.json +5 -0
teradataml/data/creditcard_data.csv +284618 -0
teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
teradataml/data/load_example_data.py +29 -11
teradataml/data/payment_fraud_dataset.csv +10001 -0
teradataml/data/teradataml_example.json +67 -0
teradataml/dataframe/copy_to.py +714 -54
teradataml/dataframe/dataframe.py +1153 -33
teradataml/dataframe/dataframe_utils.py +8 -3
teradataml/dataframe/functions.py +168 -1
teradataml/dataframe/setop.py +4 -1
teradataml/dataframe/sql.py +141 -9
teradataml/dbutils/dbutils.py +470 -35
teradataml/dbutils/filemgr.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +456 -142
teradataml/lib/aed_0_1.dll +0 -0
teradataml/lib/libaed_0_1.dylib +0 -0
teradataml/lib/libaed_0_1.so +0 -0
teradataml/lib/libaed_0_1_aarch64.so +0 -0
teradataml/scriptmgmt/UserEnv.py +234 -34
teradataml/scriptmgmt/lls_utils.py +43 -17
teradataml/sdk/_json_parser.py +1 -1
teradataml/sdk/api_client.py +9 -6
teradataml/sdk/modelops/_client.py +3 -0
teradataml/series/series.py +12 -7
teradataml/store/feature_store/constants.py +601 -234
teradataml/store/feature_store/feature_store.py +2886 -616
teradataml/store/feature_store/mind_map.py +639 -0
teradataml/store/feature_store/models.py +5831 -214
teradataml/store/feature_store/utils.py +390 -0
teradataml/table_operators/table_operator_util.py +1 -1
teradataml/table_operators/templates/dataframe_register.template +6 -2
teradataml/table_operators/templates/dataframe_udf.template +6 -2
teradataml/utils/docstring.py +527 -0
teradataml/utils/dtypes.py +93 -0
teradataml/utils/internal_buffer.py +2 -2
teradataml/utils/utils.py +41 -2
teradataml/utils/validators.py +694 -17
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0

teradataml/automl/autodataprep/__init__.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# ##################################################################
+#
+# Copyright 2025 Teradata. All rights reserved.
+# TERADATA CONFIDENTIAL AND TRADE SECRET
+#
+# Primary Owner: Sweta Shaw
+# Email Id: Sweta.Shaw@Teradata.com
+#
+# Secondary Owner: Akhil Bisht
+# Email Id: AKHIL.BISHT@Teradata.com
+#
+# Version: 1.1
+# Function Version: 1.0
+# ##################################################################
 # External libraries
 import pandas as pd

teradataml/automl/custom_json_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # ##################################################################
 #
-# Copyright 2024 Teradata. All rights reserved.
+# Copyright 2025 Teradata. All rights reserved.
 # TERADATA CONFIDENTIAL AND TRADE SECRET
 #
 # Primary Owner: Sweta Shaw
@@ -14,15 +14,21 @@
 # ##################################################################
 import json
+from teradataml.common.constants import AutoMLConstants
 class _GenerateCustomJson:
-    def __init__(self):
+    def __init__(self, cluster=False):
         """
         DESCRIPTION:
             Function initializes the data and flags for custom JSON file generation.
+        PARAMETERS:
+            cluster:
+                Optional Argument.
+                Specifies whether to apply clustering techniques.
+                Default Value: False
+                Types: bool
         """
         # Initializing data dictionary for storing custom parameters
         self.data = {}
@@ -30,6 +36,7 @@ class _GenerateCustomJson:
         self.fe_flag = {index : False for index in range(1, 8)}
         self.de_flag = {index : False for index in range(1, 5)}
         self.mt_flag = {index : False for index in range(1, 2)}
+        self.cluster = cluster
     def _process_list_input(self,
                             input_data,
@@ -280,13 +287,21 @@ class _GenerateCustomJson:
         """
         print("\nCustomizing Data Preparation Phase ...")
         # Available options for customization of data preparation phase
-        dp_customize_options = {
-            1: 'Customize Data Imbalance Handling',
-            2: 'Customize Outlier Handling',
-            3: 'Customize Feature Scaling',
-            4: 'Back to main menu',
-            5: 'Generate custom json and exit'
+        if self.cluster:
+            dp_customize_options = {
+                1: 'Customize Outlier Handling',
+                2: 'Customize Feature Scaling',
+                3: 'Back to main menu',
+                4: 'Generate custom json and exit'
             }
+        else:
+            dp_customize_options = {
+                1: 'Customize Data Imbalance Handling',
+                2: 'Customize Outlier Handling',
+                3: 'Customize Feature Scaling',
+                4: 'Back to main menu',
+                5: 'Generate custom json and exit'
+                }
         while True:
@@ -296,19 +311,26 @@ class _GenerateCustomJson:
                 print(f"\nIndex {index}: {options}")
             print("-"*80)
             # Mapping each index to corresponding functionality
-            de_method_map = {
-                1: self._get_customize_input_data_imbalance_handling,
-                2: self._get_customize_input_outlier_handling,
-                3: self._get_customize_input_feature_scaling
-            }
+            if self.cluster:
+                de_method_map = {
+                    1: self._get_customize_input_outlier_handling,
+                    2: self._get_customize_input_feature_scaling
+                }
+                de_back_key, de_exit_key = 3, 4
+            else:
+                de_method_map = {
+                    1: self._get_customize_input_data_imbalance_handling,
+                    2: self._get_customize_input_outlier_handling,
+                    3: self._get_customize_input_feature_scaling
+                }
+                de_back_key, de_exit_key = 4, 5
             # Taking required input for customizing data preparation.
             dp_phase_idx = self._process_list_input(
                 input("\nEnter the list of indices you want to customize in data preparation phase: "),
                 'int', list(dp_customize_options.keys()))
             # Setting back_key and exit_key
-            de_back_key, de_exit_key = 4, 5
             # Flag variable to back to main menu
             de_exit_to_main_flag = False
             # Flag variable to exit from main menu
@@ -664,7 +686,6 @@ class _GenerateCustomJson:
         self._set_generic_arguement(func_name='StringManipulationParam')
         print("\nCustomization of string manipulation has been completed successfully.")
     def _get_customize_input_categorical_encoding(self,
                                                   first_execution_flag=False):
         """
@@ -927,8 +948,7 @@ class _GenerateCustomJson:
         self.data['DataImbalanceMethod'] = sampling_methods[sampling_mthd_idx]
         print("\nCustomization of data imbalance handling has been completed successfully.")
     def _get_customize_input_outlier_handling(self,
                                               first_execution_flag=False):
         """
@@ -946,13 +966,30 @@ class _GenerateCustomJson:
         if first_execution_flag:
             print("\nWARNING : Reinitiated outlier handling customization. "
                   "Overwriting the previous input.")
-            keys_to_remove = ['OutlierLowerPercentile', 'OutlierUpperPercentile']
+            keys_to_remove = ['OutlierLowerPercentile', 'OutlierUpperPercentile', 'OutlierFilterMethod', 'OutlierFilterParam']
             for key in keys_to_remove:
                 if key in self.data:
                     del self.data[key]
         print("\nCustomizing Outlier Handling ...")
+        apply_outlier_options = {1: 'Yes', 2: 'No'}
+        print("\nDo you want to apply outlier filtering?")
+        for idx, val in apply_outlier_options.items():
+            print(f"Index {idx}: {val}")
+        user_choice = self._process_single_input(
+            input("\nEnter the index of your choice (1 for Yes, 2 for No): "),
+            'int',
+            list(apply_outlier_options.keys())
+        )
+        if user_choice == 2:
+            self.data['OutlierFilterIndicator'] = False
+            print("\nSkipping outlier filtering as per user choice.")
+            return
         # Setting indicator for outlier handling
         self.data['OutlierFilterIndicator'] = True
         outlier_methods = {1: 'percentile',
@@ -1127,52 +1164,68 @@ class _GenerateCustomJson:
             Allowed hyperparameters for model.
         """
         # Setting allowed common hyperparameters for tree like model
-        allowed_common_hyperparameters_tree_model ={
-            1 : 'min_impurity',
-            2 : 'max_depth',
-            3 : 'min_node_size',
-        }
-        # Setting allowed hyperparameters for xgbooost model
-        allowed_hyperparameters_xgboost = {
-            **allowed_common_hyperparameters_tree_model,
-            4 : 'shrinkage_factor',
-            5 : 'iter_num'
-        }
-        # Setting allowed hyperparameters for decision forest model
-        allowed_hyperparameters_decision_forest = {
-            **allowed_common_hyperparameters_tree_model,
-            4 : 'num_trees'
-        }
-        # Setting allowed hyperparameters for knn model
-        allowed_hyperparameters_knn = {
-            0 : 'k'
-        }
-        # Setting allowed hyperparameters for svm model
-        allowed_hyperparameters_svm = {
-            1 : 'alpha',
-            2 : 'learning_rate',
-            3 : 'initial_eta',
-            4 : 'momentum',
-            5 : 'iter_num_no_change',
-            6 : 'iter_max',
-            7 : 'batch_size'
-        }
-        # Setting allowed hyperparameters for glm model
-        allowed_hyperparameters_glm = {
-            **allowed_hyperparameters_svm,
-            8 : 'tolerance',
-            9 : 'nesterov',
-            10 : 'intercept',
-            11 : 'local_sgd_iterations'
-        }
-        # Setting allowed hyperparameters for different models
-        allowed_hyperparameters = {
-            'xgboost' : allowed_hyperparameters_xgboost,
-            'decision_forest' : allowed_hyperparameters_decision_forest,
-            'knn' : allowed_hyperparameters_knn,
-            'svm' : allowed_hyperparameters_svm,
-            'glm' : allowed_hyperparameters_glm
-        }
+        if self.cluster:
+            allowed_hyperparameters_kmeans ={
+                1 : 'n_clusters',
+                2 : 'init',
+                3 : 'max_iter',
+            }
+            allowed_hyperparameters_gaussian_mixture ={
+                1 : 'n_components',
+                2 : 'covariance_type',
+                3 : 'max_iter',
+            }
+            allowed_hyperparameters = {
+                'KMeans' : allowed_hyperparameters_kmeans,
+                'GaussianMixture' : allowed_hyperparameters_gaussian_mixture,
+            }
+        else:
+            allowed_common_hyperparameters_tree_model ={
+                1 : 'min_impurity',
+                2 : 'max_depth',
+                3 : 'min_node_size',
+            }
+            # Setting allowed hyperparameters for xgbooost model
+            allowed_hyperparameters_xgboost = {
+                **allowed_common_hyperparameters_tree_model,
+                4 : 'shrinkage_factor',
+                5 : 'iter_num'
+            }
+            # Setting allowed hyperparameters for decision forest model
+            allowed_hyperparameters_decision_forest = {
+                **allowed_common_hyperparameters_tree_model,
+                4 : 'num_trees'
+            }
+            # Setting allowed hyperparameters for knn model
+            allowed_hyperparameters_knn = {
+                0 : 'k'
+            }
+            # Setting allowed hyperparameters for svm model
+            allowed_hyperparameters_svm = {
+                1 : 'alpha',
+                2 : 'learning_rate',
+                3 : 'initial_eta',
+                4 : 'momentum',
+                5 : 'iter_num_no_change',
+                6 : 'iter_max',
+                7 : 'batch_size'
+            }
+            # Setting allowed hyperparameters for glm model
+            allowed_hyperparameters_glm = {
+                **allowed_hyperparameters_svm,
+                8 : 'tolerance',
+                9 : 'nesterov',
+                10 : 'intercept',
+                11 : 'local_sgd_iterations'
+            }
+            # Setting allowed hyperparameters for different models
+            allowed_hyperparameters = {
+                'xgboost' : allowed_hyperparameters_xgboost,
+                'decision_forest' : allowed_hyperparameters_decision_forest,
+                'knn' : allowed_hyperparameters_knn,
+                'svm' : allowed_hyperparameters_svm,
+                'glm' : allowed_hyperparameters_glm
+            }
         return allowed_hyperparameters[model_name]
     def _get_allowed_hyperparameters_types(self, hyperparameter):
@@ -1190,26 +1243,35 @@ class _GenerateCustomJson:
             Allowed hyperparameters types for hyperparameter.
         """
         # Setting allowed hyperparameters types for different hyperparameters
-        allowed_hyperparameters_types = {
-            'min_impurity' : 'float',
-            'max_depth' : 'int',
-            'min_node_size' : 'int',
-            'shrinkage_factor' : 'float',
-            'iter_num' : 'int',
-            'num_trees' : 'int',
-            'k' : 'int',
-            'alpha' : 'float',
-            'learning_rate' : 'str',
-            'initial_eta' : 'float',
-            'momentum' : 'float',
-            'iter_num_no_change' : 'int',
-            'iter_max' : 'int',
-            'batch_size' : 'int',
-            'tolerance' : 'float',
-            'nesterov' : 'bool',
-            'intercept' : 'bool',
-            'local_sgd_iterations' : 'int'
-        }
+        if self.cluster:
+            allowed_hyperparameters_types = {
+                'n_clusters': 'int',
+                'init': 'str',
+                'max_iter': 'int',
+                'n_components': 'int',
+                'covariance_type': 'str'
+            }
+        else:
+            allowed_hyperparameters_types = {
+                'min_impurity' : 'float',
+                'max_depth' : 'int',
+                'min_node_size' : 'int',
+                'shrinkage_factor' : 'float',
+                'iter_num' : 'int',
+                'num_trees' : 'int',
+                'k' : 'int',
+                'alpha' : 'float',
+                'learning_rate' : 'str',
+                'initial_eta' : 'float',
+                'momentum' : 'float',
+                'iter_num_no_change' : 'int',
+                'iter_max' : 'int',
+                'batch_size' : 'int',
+                'tolerance' : 'float',
+                'nesterov' : 'bool',
+                'intercept' : 'bool',
+                'local_sgd_iterations' : 'int'
+            }
         return allowed_hyperparameters_types[hyperparameter]
     def _get_customize_input_model_hyperparameter(self,
@@ -1233,11 +1295,12 @@ class _GenerateCustomJson:
         # Setting indicator for model hyperparameter tuning
         self.data['HyperparameterTuningIndicator'] = True
         self.data['HyperparameterTuningParam'] = {}
-        all_models = {1: 'decision_forest',
-                      2: 'xgboost',
-                      3: 'knn',
-                      4: 'glm',
-                      5: 'svm'}
+        if self.cluster:
+            # Create numbered mapping for clustering models
+            all_models = {i+1: model for i, model in enumerate(AutoMLConstants.CLUSTERING_MODELS.value)}
+        else:
+            # Create numbered mapping for supervised models
+            all_models = {i+1: model for i, model in enumerate(AutoMLConstants.SUPERVISED_MODELS.value)}
         # Displaying available models for hyperparameter tuning
         print("\nAvailable models for hyperparameter tuning with corresponding indices:")
         for index, model in all_models.items():
@@ -1309,26 +1372,35 @@ class _GenerateCustomJson:
                 Types: str
         """
         # Setting example hyperparameter values for different hyperparameters
-        example_hyperparameters = {
-            'min_impurity' : ([0.1,0.6], 'float'),
-            'max_depth' : ([1,5,10], 'int'),
-            'min_node_size' : ([1,20,100], 'int'),
-            'num_trees' : ([10,50,100], 'int'),
-            'k' : ([5,25,100], 'int'),
-            'shrinkage_factor': ([0.1,0.5,1.0], 'float'),
-            'alpha' : ([0.1,0.5,1.0], 'float'),
-            'learning_rate' : (['constant','optimal','invtime','adaptive'], 'str'),
-            'initial_eta' : ([0.05,0.1], 'float'),
-            'momentum' : ([0.65,0.95], 'float'),
-            'iter_num_no_change' : ([25,50,100], 'int'),
-            'iter_max' : ([10,100,300], 'int'),
-            'batch_size' : ([10,50,100], 'int'),
-            'tolerance' : ([0.0001,0.01], 'float'),
-            'nesterov' : (['true','false'], 'bool'),
-            'intercept' : (['true','false'], 'bool'),
-            'local_sgd_iterations' : ([10,25,50], 'int'),
-            'iter_num' : ([10,50,100], 'int')
-        }
+        if self.cluster:
+            example_hyperparameters = {
+                'n_clusters': ([2, 3, 4], 'int'),
+                'init': (['k-means++', 'random'], 'str'),
+                'max_iter': ([100, 300], 'int'),
+                'n_components': ([2, 3, 4], 'int'),
+                'covariance_type': (['full', 'tied', 'diag', 'spherical'], 'str')
+            }
+        else:
+            example_hyperparameters = {
+                'min_impurity' : ([0.1,0.6], 'float'),
+                'max_depth' : ([1,5,10], 'int'),
+                'min_node_size' : ([1,20,100], 'int'),
+                'num_trees' : ([10,50,100], 'int'),
+                'k' : ([5,25,100], 'int'),
+                'shrinkage_factor': ([0.1,0.5,1.0], 'float'),
+                'alpha' : ([0.1,0.5,1.0], 'float'),
+                'learning_rate' : (['constant','optimal','invtime','adaptive'], 'str'),
+                'initial_eta' : ([0.05,0.1], 'float'),
+                'momentum' : ([0.65,0.95], 'float'),
+                'iter_num_no_change' : ([25,50,100], 'int'),
+                'iter_max' : ([10,100,300], 'int'),
+                'batch_size' : ([10,50,100], 'int'),
+                'tolerance' : ([0.0001,0.01], 'float'),
+                'nesterov' : (['true','false'], 'bool'),
+                'intercept' : (['true','false'], 'bool'),
+                'local_sgd_iterations' : ([10,25,50], 'int'),
+                'iter_num' : ([10,50,100], 'int')
+            }
         print(f"\nExample values for hyperparameter '{hyperparameter_name}' :")
         if hyperparameter_name in example_hyperparameters:

teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.6py3-none-any.whl → 20.0.0.7py3-none-any.whl