PyPI - teradataml - Versions diffs - 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl - Mend

teradataml 20.0.0.3py3-none-any.whl → 20.0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +193 -1
teradataml/__init__.py +2 -1
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +25 -18
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
teradataml/analytics/sqle/__init__.py +20 -2
teradataml/analytics/utils.py +15 -1
teradataml/analytics/valib.py +18 -4
teradataml/automl/__init__.py +341 -112
teradataml/automl/autodataprep/__init__.py +471 -0
teradataml/automl/data_preparation.py +84 -42
teradataml/automl/data_transformation.py +69 -33
teradataml/automl/feature_engineering.py +76 -9
teradataml/automl/feature_exploration.py +639 -25
teradataml/automl/model_training.py +35 -14
teradataml/clients/auth_client.py +2 -2
teradataml/common/__init__.py +1 -2
teradataml/common/constants.py +122 -63
teradataml/common/messagecodes.py +14 -3
teradataml/common/messages.py +8 -4
teradataml/common/sqlbundle.py +40 -10
teradataml/common/utils.py +366 -74
teradataml/common/warnings.py +11 -0
teradataml/context/context.py +348 -86
teradataml/data/amazon_reviews_25.csv +26 -0
teradataml/data/apriori_example.json +22 -0
teradataml/data/byom_example.json +11 -0
teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
teradataml/data/hnsw_alter_data.csv +5 -0
teradataml/data/hnsw_data.csv +10 -0
teradataml/data/jsons/byom/h2opredict.json +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +266 -0
teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
teradataml/data/ner_dict.csv +8 -0
teradataml/data/ner_input_eng.csv +7 -0
teradataml/data/ner_rule.csv +5 -0
teradataml/data/pos_input.csv +40 -0
teradataml/data/tdnerextractor_example.json +14 -0
teradataml/data/teradataml_example.json +21 -0
teradataml/data/textmorph_example.json +5 -0
teradataml/data/to_num_data.csv +4 -0
teradataml/data/tochar_data.csv +5 -0
teradataml/data/trans_dense.csv +16 -0
teradataml/data/trans_sparse.csv +55 -0
teradataml/data/vectordistance_example.json +1 -1
teradataml/dataframe/copy_to.py +45 -29
teradataml/dataframe/data_transfer.py +72 -46
teradataml/dataframe/dataframe.py +642 -166
teradataml/dataframe/dataframe_utils.py +167 -22
teradataml/dataframe/functions.py +135 -20
teradataml/dataframe/setop.py +11 -6
teradataml/dataframe/sql.py +330 -78
teradataml/dbutils/dbutils.py +556 -140
teradataml/dbutils/filemgr.py +14 -10
teradataml/hyperparameter_tuner/optimizer.py +12 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
teradataml/opensource/_class.py +141 -17
teradataml/opensource/{constants.py → _constants.py} +7 -3
teradataml/opensource/_lightgbm.py +52 -53
teradataml/opensource/_sklearn.py +1008 -0
teradataml/opensource/_wrapper_utils.py +5 -5
teradataml/options/__init__.py +47 -15
teradataml/options/configure.py +103 -26
teradataml/options/display.py +13 -2
teradataml/plot/axis.py +47 -8
teradataml/plot/figure.py +33 -0
teradataml/plot/plot.py +63 -13
teradataml/scriptmgmt/UserEnv.py +307 -40
teradataml/scriptmgmt/lls_utils.py +428 -145
teradataml/store/__init__.py +2 -3
teradataml/store/feature_store/feature_store.py +102 -7
teradataml/table_operators/Apply.py +48 -19
teradataml/table_operators/Script.py +23 -2
teradataml/table_operators/TableOperator.py +3 -1
teradataml/table_operators/table_operator_util.py +58 -9
teradataml/utils/dtypes.py +49 -1
teradataml/utils/internal_buffer.py +38 -0
teradataml/utils/validators.py +377 -62
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/sklearn/__init__.py +0 -0
teradataml/store/vector_store/__init__.py +0 -1586
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0

teradataml/automl/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ from teradataml import ColumnExpression
 from teradataml.dataframe.dataframe import DataFrame
 from teradataml.utils.utils import execute_sql
 from teradataml.utils.validators import _Validators
-from teradataml import ROC, BLOB
+from teradataml import ROC, BLOB, VARCHAR
 from teradataml.utils.dtypes import _Dtypes
 from teradataml.common.utils import UtilFuncs
 from teradataml import TeradataMlException
@@ -94,6 +94,9 @@ class AutoML:
             the processes by passing the JSON file path in case of custom run. It also
             supports early stopping of model training based on stopping metrics,
             maximum running time and maximum models to be trained.
+            Note:
+                * configure.temp_object_type="VT" follows sequential execution.
         PARAMETERS:
             task_type:
@@ -185,8 +188,17 @@ class AutoML:
                         results are persisted in a table; otherwise,
                         results are garbage collected at the end of the
                         session.
+                        Note:
+                            * User is responsible for cleanup of the persisted tables. List of persisted tables
+                              in current session can be viewed using get_persisted_tables() method.
                         Default Value: False
                         Types: bool
+                    seed:
+                        Optional Argument.
+                        Specifies the random seed for reproducibility.
+                        Default Value: 42
+                        Types: int
         RETURNS:
             Instance of AutoML.
@@ -417,9 +429,11 @@ class AutoML:
         volatile = kwargs.get('volatile', False)
         persist = kwargs.get('persist', False)
+        seed = kwargs.get('seed', 42)
         arg_info_matrix.append(["volatile", volatile, True, (bool)])
         arg_info_matrix.append(["persist", persist, True, (bool)])
+        arg_info_matrix.append(["seed", seed, True, (int)])
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_matrix)
@@ -465,8 +479,13 @@ class AutoML:
         self._is_fit_called = False
         self._is_load_model_called = False
         self.kwargs = kwargs
-        self.table_name_mapping={}
+        self.table_name_mapping = {}
+        # Stores the table name of all intermediate datas
+        self._intermediate_table_names={}
+        self._auto_dataprep = False
+        self._phases = None
+        self._progressbar_prefix = "AutoML Running:"
     @collect_queryband(queryband="AutoML_fit")
     def fit(self,
             data,
@@ -517,7 +536,7 @@ class AutoML:
         # Validate argument types
         _Validators._validate_function_arguments(arg_info_fit_matrix)
         # Initializing class variables
         self.data = data
         self.target_column = target_column
@@ -591,15 +610,25 @@ class AutoML:
         clf = task_cls(self.data, self.target_column, self.custom_data)
         self.model_info, self.leader_board, self.target_count, self.target_label, \
-            self.data_transformation_params, self.table_name_mapping = getattr(clf, cls_method)(
-                                                                       model_list = self.model_list,
-                                                                       auto = self.auto,
-                                                                       verbose = self.verbose,
-                                                                       max_runtime_secs = self.max_runtime_secs,
-                                                                       stopping_metric = self.stopping_metric,
-                                                                       stopping_tolerance = self.stopping_tolerance,
-                                                                       max_models = self.max_models,
-                                                                       **self.kwargs)
+            self.data_transformation_params, self._intermediate_table_names = getattr(clf, cls_method)(
+                                                                              model_list = self.model_list,
+                                                                              auto = self.auto,
+                                                                              verbose = self.verbose,
+                                                                              max_runtime_secs = self.max_runtime_secs,
+                                                                              stopping_metric = self.stopping_metric,
+                                                                              stopping_tolerance = self.stopping_tolerance,
+                                                                              max_models = self.max_models,
+                                                                              auto_dataprep = self._auto_dataprep,
+                                                                              automl_phases = self._phases,
+                                                                              progress_prefix = self._progressbar_prefix,
+                                                                              **self.kwargs)
+        # table_name_mapping stores the table name of all intermediate datas (lasso, rfe, pca)
+        # used for training models
+        keys_to_extract = ['lasso_train', 'rfe_train', 'pca_train']
+        self.table_name_mapping = {key: self._intermediate_table_names[key] for key in keys_to_extract
+                                   if key in self._intermediate_table_names}
         # Model Evaluation Phase
         self.m_evaluator = _ModelEvaluator(self.model_info,
@@ -669,13 +698,9 @@ class AutoML:
             >>> prediction = automl_obj.predict(admissions_test, rank=3, use_loaded_models=True)
             >>> prediction
         """
-        # Checking if fit or load model is called before predict, If not raise error
-        if not self._is_fit_called and not self._is_load_model_called:
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'predict' method", \
-                                       "'fit' or 'load' method must be called before" \
-                                       " running predict.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        # Raise error if fit is not called before predict
+        _Validators._validate_dependent_method("predict", ["fit", "load"],
+                                                [self._is_fit_called, self._is_load_model_called])
         # Appending predict arguments to list for validation.
         arg_info_pred_matrix = []
@@ -758,11 +783,12 @@ class AutoML:
         if self.target_column_ind:
             prediction_column = 'prediction' if 'prediction' in pred.result.columns else 'Prediction'
             probability_column = 'prob_1'
+            pred_target_count = pred.result.drop_duplicate(self.target_column).size
             # Displaying confusion matrix and ROC-AUC for classification problem
             if self.is_classification_type():
                 print_data = lambda data: print(data) if _is_terminal() else display(data)
                 # Displaying ROC-AUC for binary classification
-                if self.target_count == 2:
+                if self.target_count == 2 and pred_target_count == 2:
                     fit_params = {
                         "probability_column" : probability_column,
                         "observation_column" : self.target_column,
@@ -850,13 +876,10 @@ class AutoML:
             >>> evaluation = automl_obj.evaluate(admissions_test, rank=3, use_loaded_models=True)
             >>> evaluation
         """
-        if not self._is_fit_called and not self._is_load_model_called:
-            # raise ValueError("fit() method must be called before evaluating.")
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'evaluate' method", \
-                                       "'fit' or 'load' method must be called before" \
-                                       " running evaluate.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        # Raising exception if fit or load model is not called before evaluate
+        _Validators._validate_dependent_method("evaluate", ["fit", "load"],
+                                               [self._is_fit_called, self._is_load_model_called])
         # Appending evaluate arguments to list for validation.
         arg_info_pred_matrix = []
         arg_info_pred_matrix.append(["data", data, False, (DataFrame), True])
@@ -886,8 +909,8 @@ class AutoML:
         # as it is required for evaluation.
         if self.target_column not in data.columns:
              raise TeradataMlException(
-                            Messages.get_message(MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE).format(self.target_column),
-                            MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE)
+                 Messages.get_message(MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE).format(self.target_column),
+                 MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE)
         # Checking if data is already transformed before or not
         data_node_id = data._nodeid
@@ -1005,13 +1028,9 @@ class AutoML:
             # Generate leaderboard using leaderboard() method on "automl_obj".
             >>> automl_obj.leaderboard()
         """
-        if not self._is_fit_called:
-            # raise ValueError("fit() method must be called before generating leaderboard.")
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'leaderboard' method", \
-                                       "'fit' method must be called before" \
-                                       " generating leaderboard.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        # Raise error if fit is not called before leaderboard
+        _Validators._validate_dependent_method("leaderboard", "fit", self._is_fit_called)
         return self.leader_board
     @collect_queryband(queryband="AutoML_leader")
@@ -1034,13 +1053,9 @@ class AutoML:
             # Display best performing model using leader() method on "automl_obj".
             >>> automl_obj.leader()
         """
-        if not self._is_fit_called:
-            # raise ValueError("fit() method must be called before generating leader.")
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'leader' method", \
-                                       "'fit' method must be called before" \
-                                       " generating leader.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        # Raise error if fit is not called before leader
+        _Validators._validate_dependent_method("leader", "fit", self._is_fit_called)
         record = self.leader_board
         if not _is_terminal():
             display(record[record['RANK'] == 1])
@@ -1113,13 +1128,9 @@ class AutoML:
             >>> automl_obj.model_hyperparameters(rank=1)
         """
-        if not self._is_fit_called and not self._is_load_model_called:
-            # raise ValueError("fit() or load() method must be called before getting hyperparameters.")
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'model_hyperparameters' method",
-                                       "No models available to get hyperparameters. " \
-                                       "Run 'fit()' or 'load()' methods to get models.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        # Raise error if fit or load model is not called before model_hyperparameters
+        _Validators._validate_dependent_method("model_hyperparameters", ["fit", "load"],
+                                               [self._is_fit_called, self._is_load_model_called])
         arg_info_matrix = []
         arg_info_matrix.append(["rank", rank, True, (int), True])
@@ -1234,6 +1245,8 @@ class AutoML:
         pca.n_components_ = load_pca_info['n_components']
         pca.noise_variance_ = load_pca_info['noise_variance']
         pca.singular_values_ = np.array(load_pca_info['singular_values'])
+        pca.feature_names_in_ = data_params['pca_fit_columns']
+        pca.n_features_in_ = len(data_params['pca_fit_columns'])
         data_params['pca_fit_instance'] = pca
@@ -1256,28 +1269,18 @@ class AutoML:
         start_rank, end_rank = ranks.start, ranks.stop
         # Check if both parts are non-negative integers
-        if not (start_rank > 0 and end_rank > 0):
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'deploy' method", \
-                                       "Provided start and end rank in 'ranks' "\
-                                       "must be positive non-zero integers.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        _Validators._validate_positive_int(start_rank, "ranks(start)")
+        _Validators._validate_positive_int(end_rank, "ranks(end)")
         # Check if start_rank is less than or equal to end_rank
         if start_rank > end_rank:
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'deploy' method", \
-                                       "Provided start rank in 'ranks' must be less than"\
-                                       " or equal to end rank in 'ranks'.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+            err = "Provided start rank in 'ranks' must be less than or equal to end rank in 'ranks'."
+            self._raise_error("deploy", err)
         # check end rank is less than or equal to total models
         if end_rank > self.leader_board.RANK.max():
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                       "'deploy' method", \
-                                       "Provided end rank in 'ranks' must be less than"\
-                                       " or equal to total models available.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+            err = "Provided end rank in 'ranks' must be less than or equal to total models available."
+            self._raise_error("deploy", err)
         return start_rank, end_rank
@@ -1342,12 +1345,7 @@ class AutoML:
             >>> obj.deploy("model_table", ranks=range(2,6))
         """
         # raise Error if fit is not called
-        if not self._is_fit_called:
-            err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
-                                    "'deploy' method", \
-                                    "'fit' method must be called before" \
-                                    " 'deploy'.")
-            raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+        _Validators._validate_dependent_method("deploy", "fit", self._is_fit_called)
         # Appending arguments to list for validation
         arg_info_matrix = []
@@ -1442,7 +1440,8 @@ class AutoML:
         # Saving data transformation parameters to the specified table
         sv_models = pd.concat([sv_models, df], ignore_index=True, sort=False)
-        copy_to_sql(df = sv_models, table_name=table_name, if_exists='replace', types={'DATA_PARAMS':BLOB})
+        copy_to_sql(df = sv_models, table_name=table_name, if_exists='replace', types={'DATA_PARAMS':BLOB,
+                                                                                       'PARAMETERS':VARCHAR(length=32000, charset='UNICODE')})
         print('Model Deployment Completed Successfully.')
@@ -1793,6 +1792,185 @@ class AutoML:
         db_drop_table(table_name)
+    @collect_queryband(queryband="AutoML_get_persisted_tables")
+    def get_persisted_tables(self):
+        """
+        DESCRIPTION:
+            Get the list of the tables that are persisted in the database.
+            Note:
+                * User is responsible for keeping track of the persistent tables
+                  and cleanup of the same if required.
+        PARAMETERS:
+            None
+        RETURNS:
+            Dictionary, containing the list of table names that mapped to the stage
+            at which it was generated.
+        RAISES:
+            TeradataMlException.
+        EXAMPLES:
+            # Create an instance of the AutoML called "obj"
+            # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
+            # 'persist' argument must be set to True in the AutoML object.
+            >>> obj = AutoML(verbose=2, max_models=10, persist=True)
+            # Load and fit the data.
+            >>> load_example_data("teradataml", "titanic")
+            >>> titanic_data = DataFrame("titanic")
+            >>> obj.fit(data = titanic_data, target_column = titanic.survived)
+            # Get the list of tables that are persisted in the database.
+            >>> obj.get_persisted_tables()
+        """
+        # Check if fit is called
+        _Validators._validate_dependent_method("get_persisted_tables", "fit", self._is_fit_called)
+        # check if persist is passed as argument and is set to True
+        persist_val = True if self.kwargs.get('persist', False) else None
+        _Validators._validate_dependent_argument("get_persisted_tables", True,
+                                                 "persist", persist_val,
+                                                 msg_arg_value='True')
+        # result table names
+        return self._intermediate_table_names
+    def _raise_error(self, method_name, error_msg):
+        """
+        DESCRIPTION:
+            Internal Function raises an error message when a method
+            fails to execute.
+        PARAMETERS:
+            method_name:
+                Required Argument.
+                Specifies the method name that failed to execute.
+                Types: str
+            error_msg:
+                Required Argument.
+                Specifies the error message to be displayed.
+                Types: str
+        RAISES:
+            TeradataMlException.
+        EXAMPLES:
+            >>> self._raise_error("fit", "fit() method must be called before 'deploy'.")
+        """
+        err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
+                                   f'{method_name} method',
+                                   error_msg)
+        raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+    @staticmethod
+    def visualize(**kwargs):
+        """
+        DESCRIPTION:
+            Function visualizes the data using various plots such as heatmap,
+            pair plot, histogram, univariate plot, count plot, box plot, and target distribution.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input teradataml DataFrame for plotting.
+                Types: teradataml Dataframe
+            target_column:
+                Required Argument.
+                Specifies the name of the target column in "data".
+                Note:
+                    * "target_column" must be of numeric type.
+                Types: str
+            plot_type:
+                Optional Argument.
+                Specifies the type of plot to be displayed.
+                Default Value: "target"
+                Permitted Values:
+                    * "heatmap": Displays a heatmap of feature correlations.
+                    * "pair": Displays a pair plot of features.
+                    * "density": Displays a density plot of features.
+                    * "count": Displays a count plot of categorical features.
+                    * "box": Displays a box plot of numerical features.
+                    * "target": Displays the distribution of the target variable.
+                    * "all": Displays all the plots.
+                Types: str, list of str
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            columns:
+                Optional Argument.
+                Specifies the column names to be used for plotting.
+                Types: str or list of string
+            max_features:
+                Optional Argument.
+                Specifies the maximum number of features to be used for plotting.
+                Default Value: 10
+                Note:
+                    * It applies separately to categorical and numerical features.
+                Types: int
+            problem_type:
+                Optional Argument.
+                Specifies the type of problem.
+                Permitted Values:
+                    * 'regression'
+                    * 'classification'
+                Types: str
+        RETURNS:
+            None
+        RAISES:
+            TeradataMlException.
+        EXAMPLES:
+            # Import either of AutoML or AutoClassifier or AutoRegressor or Autodataprep
+            # from teradataml.
+            >>> from teradataml import AutoML
+            >>> from teradataml import DataFrame
+            >>> load_example_data("teradataml", "titanic")
+            >>> titanic_data = DataFrame("titanic")
+            # Example 1: Visualize the data using AutoML class.
+            >>> AutoML.visualize(data = titanic_data,
+            ...                  target_column = 'survived',
+            ...                  plot_type = ['heatmap', 'pair', 'histogram', 'target'],
+            ...                  length = 10,
+            ...                  breadth = 8,
+            ...                  max_features = 10,
+            ...                  problem_type = 'classification')
+            # Example 2: Visualize the data using AutoDataPrep class.
+            >>> from teradataml import AutoDataPrep
+            >>> obj = AutoDataPrep(task_type="classification")
+            >>> obj.fit(data = titanic_data, target_column = 'survived')
+            # Retrieve the data from AutoDataPrep object.
+            >>> datas = obj.get_data()
+            >>> AutoDataPrep.visualize(data = datas['lasso_train'],
+            ...                        target_column = 'survived',
+            ...                        plot_type = 'all'
+            ...                        length = 20,
+            ...                        breadth = 15)
+        """
+        _FeatureExplore._visualize(**kwargs)
     @staticmethod
     def generate_custom_config(file_name = "custom"):
         """
@@ -1877,7 +2055,7 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
     def _regression(self,
-                    model_list = None,
+                    model_list=None,
                     auto = False,
                     verbose = 0,
                     max_runtime_secs = None,
@@ -1945,16 +2123,23 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
                 Default Value: False
                 Types: bool
+            seed:
+                Optional Argument.
+                Specifies the random seed for reproducibility.
+                Default Value: 42
+                Types: int
         RETURNS:
             a tuple containing, model information and leaderboard.
         """
         # Feature Exploration Phase
         _FeatureExplore.__init__(self,
                                  data = self.data,
                                  target_column = self.target_column,
                                  verbose=verbose)
         if verbose > 0:
-            self._exploration()
+            self._exploration(**kwargs)
         # Feature Engineering Phase
         _FeatureEngineering.__init__(self,
                                      data = self.data,
@@ -1965,7 +2150,8 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
                                      **kwargs)
         # Start time
         start_time = time.time()
-        data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
+        data, excluded_columns, target_label,\
+        data_transformation_params, data_mapping = self.feature_engineering(auto)
         # Data preparation Phase
         _DataPreparation.__init__(self,
@@ -1975,8 +2161,18 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
                                   excluded_columns = excluded_columns,
                                   custom_data = self.custom_data,
                                   data_transform_dict = data_transformation_params,
+                                  data_mapping = data_mapping,
                                   **kwargs)
-        features, data_transformation_params = self.data_preparation(auto)
+        features, data_transformation_params,\
+            data_mapping = self.data_preparation(auto)
+        if kwargs.get('auto_dataprep', False):
+            models_info = None
+            leaderboard = None
+            target_count = None
+            return (models_info, leaderboard,
+                    target_count, target_label,
+                    data_transformation_params, data_mapping)
         # Calculating max_runtime_secs for model training by,
         # subtracting the time taken for feature engineering and data preparation
@@ -1998,12 +2194,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
                                 custom_data = self.custom_data,
                                 **kwargs)
         models_info, leaderboard, target_count = self.model_training(auto = auto,
-                                                        max_runtime_secs = max_runtime_secs,
-                                                        stopping_metric = stopping_metric,
-                                                        stopping_tolerance = stopping_tolerance,
-                                                        max_models = max_models)
+                                                                     max_runtime_secs = max_runtime_secs,
+                                                                     stopping_metric = stopping_metric,
+                                                                     stopping_tolerance = stopping_tolerance,
+                                                                     max_models = max_models)
-        return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
+        return (models_info, leaderboard,
+                target_count, target_label,
+                data_transformation_params, data_mapping)
 class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
@@ -2036,7 +2234,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
         self.custom_data = custom_data
     def _classification(self,
-                        model_list = None,
+                        model_list=None,
                         auto = False,
                         verbose = 0,
                         max_runtime_secs = None,
@@ -2103,18 +2301,26 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
                 session.
                 Default Value: False
                 Types: bool
+            seed:
+                Optional Argument.
+                Specifies the random seed for reproducibility.
+                Default Value: 42
+                Types: int
         RETURNS:
             a tuple containing, model information and leaderboard.
         """
         # Feature Exploration Phase
         _FeatureExplore.__init__(self,
-                                     data = self.data,
-                                     target_column = self.target_column,
-                                     verbose=verbose)
+                                 data = self.data,
+                                 target_column = self.target_column,
+                                 verbose=verbose,
+                                 task_type = "classification")
         if verbose > 0:
-            self._exploration()
+            self._exploration(**kwargs)
         # Feature Engineeting Phase
         _FeatureEngineering.__init__(self,
                                      data = self.data,
@@ -2126,7 +2332,9 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
                                      **kwargs)
         # Start time
         start_time = time.time()
-        data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
+        data, excluded_columns, target_label,\
+        data_transformation_params, data_mapping = self.feature_engineering(auto)
         # Data Preparation Phase
         _DataPreparation.__init__(self,
                                   data = self.data,
@@ -2136,8 +2344,19 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
                                   custom_data = self.custom_data,
                                   data_transform_dict = data_transformation_params,
                                   task_type = "Classification",
+                                  data_mapping = data_mapping,
                                   **kwargs)
-        features, data_transformation_params = self.data_preparation(auto)
+        features, data_transformation_params, \
+            data_mapping = self.data_preparation(auto)
+        if kwargs.get('auto_dataprep', False):
+            models_info = None
+            leaderboard = None
+            target_count = None
+            return (models_info, leaderboard,
+                    target_count, target_label,
+                    data_transformation_params, data_mapping)
         # Calculating max_runtime_secs for model training by,
         # subtracting the time taken for feature engineering and data preparation
@@ -2159,28 +2378,14 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
                                 custom_data = self.custom_data,
                                 **kwargs)
         models_info, leaderboard, target_count = self.model_training(auto = auto,
-                                                        max_runtime_secs = max_runtime_secs,
-                                                        stopping_metric = stopping_metric,
-                                                        stopping_tolerance = stopping_tolerance,
-                                                        max_models = max_models)
+                                                                     max_runtime_secs = max_runtime_secs,
+                                                                     stopping_metric = stopping_metric,
+                                                                     stopping_tolerance = stopping_tolerance,
+                                                                     max_models = max_models)
-        return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
-    def _target_column_details(self):
-        """
-        DESCRIPTION:
-            Internal function displays the target column distribution of Target column/ Response column.
-        """
-        # If data visualization libraries are available
-        if self._check_visualization_libraries() and not _is_terminal():
-            import matplotlib.pyplot as plt
-            import seaborn as sns
-            self._display_msg(msg='\nTarget Column Distribution:',
-                              show_data=True)
-            plt.figure(figsize=(6, 6))
-            # Ploting a histogram for target column
-            sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
-            plt.show()
+        return (models_info, leaderboard,
+                target_count, target_label,
+                data_transformation_params, data_mapping)
     def _check_data_imbalance(self,
                               data=None):
@@ -2324,6 +2529,9 @@ class AutoRegressor(AutoML):
         """
         DESCRIPTION:
             AutoRegressor is a special purpose AutoML feature to run regression specific tasks.
+            Note:
+                * configure.temp_object_type="VT" follows sequential execution.
         PARAMETERS:
             include:
@@ -2405,8 +2613,17 @@ class AutoRegressor(AutoML):
                         results are persisted in a table; otherwise,
                         results are garbage collected at the end of the
                         session.
+                        Note:
+                            * User is responsible for cleanup of the persisted tables. List of persisted tables
+                              in current session can be viewed using get_persisted_tables() method.
                         Default Value: False
                         Types: bool
+                    seed:
+                        Optional Argument.
+                        Specifies the random seed for reproducibility.
+                        Default Value: 42
+                        Types: int
         RETURNS:
             Instance of AutoRegressor.
@@ -2555,6 +2772,9 @@ class AutoClassifier(AutoML):
         """
         DESCRIPTION:
             AutoClassifier is a special purpose AutoML feature to run classification specific tasks.
+            Note:
+                * configure.temp_object_type="VT" follows sequential execution.
         PARAMETERS:
             include:
@@ -2636,8 +2856,17 @@ class AutoClassifier(AutoML):
                         results are persisted in a table; otherwise,
                         results are garbage collected at the end of the
                         session.
+                        Note:
+                            * User is responsible for cleanup of the persisted tables. List of persisted tables
+                              in current session can be viewed using get_persisted_tables() method.
                         Default Value: False
                         Types: bool
+                    seed:
+                        Optional Argument.
+                        Specifies the random seed for reproducibility.
+                        Default Value: 42
+                        Types: int
         RETURNS:
             Instance of AutoClassifier.
@@ -2859,4 +3088,4 @@ class AutoClassifier(AutoML):
                                              stopping_tolerance=self.stopping_tolerance,
                                              max_models=self.max_models,
                                              custom_config_file=self.custom_config_file,
-                                             **kwargs)
+                                             **kwargs)

teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.3py3-none-any.whl → 20.0.0.5py3-none-any.whl