PyPI - teradataml - Versions diffs - 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl - Mend

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +306 -0
teradataml/__init__.py +10 -3
teradataml/_version.py +1 -1
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +299 -16
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +13 -3
teradataml/analytics/json_parser/utils.py +13 -6
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +11 -2
teradataml/analytics/table_operator/__init__.py +4 -3
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +66 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1502 -323
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +247 -307
teradataml/automl/data_transformation.py +32 -12
teradataml/automl/feature_engineering.py +325 -86
teradataml/automl/model_evaluation.py +44 -35
teradataml/automl/model_training.py +122 -153
teradataml/catalog/byom.py +8 -8
teradataml/clients/pkce_client.py +1 -1
teradataml/common/__init__.py +2 -1
teradataml/common/constants.py +72 -0
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +152 -120
teradataml/common/messagecodes.py +11 -2
teradataml/common/messages.py +4 -1
teradataml/common/sqlbundle.py +26 -4
teradataml/common/utils.py +225 -14
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +82 -2
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/dataframe_example.json +27 -1
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/medical_readings.csv +101 -0
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scripts/deploy_script.py +1 -1
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
teradataml/data/star_pivot.csv +8 -0
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -1
teradataml/data/teradataml_example.json +20 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/vectordistance_example.json +4 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/dataframe/copy_to.py +1 -1
teradataml/dataframe/data_transfer.py +5 -3
teradataml/dataframe/dataframe.py +1002 -201
teradataml/dataframe/fastload.py +3 -3
teradataml/dataframe/functions.py +867 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +840 -33
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +878 -34
teradataml/dbutils/filemgr.py +48 -1
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
teradataml/opensource/_lightgbm.py +950 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
teradataml/options/__init__.py +9 -23
teradataml/options/configure.py +42 -4
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +13 -9
teradataml/scriptmgmt/lls_utils.py +77 -23
teradataml/store/__init__.py +13 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2223 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/store/vector_store/__init__.py +1586 -0
teradataml/table_operators/Script.py +2 -2
teradataml/table_operators/TableOperator.py +106 -20
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +102 -56
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/dtypes.py +4 -2
teradataml/utils/validators.py +34 -2
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0

teradataml/opensource/sklearn/_sklearn_wrapper.py CHANGED Viewed

@@ -19,7 +19,6 @@ from collections import OrderedDict, defaultdict
 from importlib import import_module
 import base64
-import functools
 import json
 import numpy
 import os
@@ -28,7 +27,7 @@ import time
 import inspect
 import warnings
 import json
-import random
+import math
 import pandas as pd
 from teradatasqlalchemy import BLOB, CLOB, FLOAT, TIMESTAMP, VARCHAR, INTEGER
 import pandas.api.types as pt
@@ -41,19 +40,18 @@ from teradataml.context.context import _get_current_databasename, get_connection
 from teradataml.dbutils.filemgr import install_file, remove_file
 from teradataml.utils.utils import execute_sql
 from teradataml.options.configure import configure
-from teradataml.opensource.sklearn._wrapper_utils import _validate_fit_run, _generate_new_name,\
+from teradataml.opensource._wrapper_utils import _validate_fit_run, _generate_new_name,\
     _validate_opensource_func_args, _derive_df_and_required_columns, _validate_df_query_type
-from teradataml.opensource.sklearn.constants import OpenSourcePackage, _OSML_MODELS_PRIMARY_INDEX,\
+from teradataml.opensource.constants import OpenSourcePackage, _OSML_MODELS_PRIMARY_INDEX,\
     _OSML_MODELS_TABLE_NAME, _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT, OpensourceModels,\
     _OSML_ADDITIONAL_COLUMN_TYPES
 from teradataml.common.messagecodes import MessageCodes
 from teradataml.common.messages import Messages
 from teradataml.catalog.byom import save_byom, retrieve_byom, delete_byom
-from teradataml.dbutils.dbutils import _create_table
+from teradataml.dbutils.dbutils import _create_table, set_session_param
 from teradataml.utils.validators import _Validators
 from teradataml.dataframe.dataframe import DataFrame
 from teradataml.dataframe.dataframe_utils import DataFrameUtils
-from teradataml.scriptmgmt.lls_utils import create_env, get_env
 from teradataml.common.garbagecollector import GarbageCollector
 from teradataml.common.constants import TeradataConstants
@@ -64,8 +62,15 @@ validator = _Validators()
 installed_model_files = defaultdict(int)
+## Flag to ensure the sklearn script
+## installation occurs only once.
+_file_installed = False
 class _GenericObjectWrapper:
     def __init__(self) -> None:
+        if not get_connection():
+            raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_CONTEXT_CONNECTION),
+                                      MessageCodes.INVALID_CONTEXT_CONNECTION)
         self._db_name = _get_current_databasename()
         self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "sklearn")
@@ -86,43 +91,24 @@ class _GenericObjectWrapper:
             if configure.openml_user_env is not None:
                 self._env = configure.openml_user_env
             else:
-                self._create_or_get_env()
+                self._env = UtilFuncs._create_or_get_env("open_source_ml.json")
         else:
-            execute_sql(f"SET SESSION SEARCHUIFDBPATH = {self._db_name};")
-    def _create_or_get_env(self):
-        """
-        Internal function to return the env if already exists else
-        creates the environment using template file and return the env.
-        """
-        # Get the template file path.
-        template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates",
-                                         "open_source_ml.json")
+            set_session_param("searchuifdbpath",self._db_name)
-        # Read template file.
-        with open(template_dir_path, "r") as r_file:
-            data = json.load(r_file)
+        global _file_installed
+        ## Flag to check whether trained model is installed or not.
+        self._is_trained_model_installed = False
-        # Get env_name.
-        _env_name = data["env_specs"][0]["env_name"]
+        ## Install all sklearn script files on Vantage.
+        if not _file_installed:
+            sklearn_script_files = ["sklearn_fit.py", "sklearn_score.py",
+                                    "sklearn_transform.py", "sklearn_fit_predict.py",
+                                    "sklearn_neighbors.py", "sklearn_model_selection_split.py"]
+            for script_file in sklearn_script_files:
+                self._install_script_file(file_identifier=script_file.split(".")[0],
+                                          file_name=script_file)
-        try:
-            # Call function to 'openml_env' get env.
-            self._env = get_env(_env_name)
-        except TeradataMlException as tdml_e:
-            # We will get here when error says, env does not exist otherwise raise the exception as is.
-            # Env does not exist so create one.
-            exc_msg = "Failed to execute get_env(). User environment '{}' not " \
-                      "found.".format(_env_name)
-            if exc_msg in tdml_e.args[0]:
-                print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
-                      "latest supported python and required packages.")
-                _env = create_env(template=template_dir_path)
-            else:
-                raise tdml_e
-        except Exception as exc:
-            raise exc
+            _file_installed = True
     def _get_columns_as_list(self, cols):
         """
@@ -205,13 +191,32 @@ class _GenericObjectWrapper:
                                   is_binary=is_binary)
         else:
             status = self._env.install_file(file_path=new_script,
-                                       replace=True,
-                                       suppress_output=True)
+                                            replace=True,
+                                            suppress_output=True)
         if not status:
             raise TeradataMlException(
                 f"Script file '{file_name}' failed to get installed/replaced in Vantage."
             )
+    def _remove_script_file(self, file_name):
+        """
+        Internal function to remove script file in Vantage.
+        """
+        # _env is set while object creation
+        # If not set, it is Vantage Enterprise. Otherwise, it is Vantage Lake.
+        if not self._is_lake_system:
+            status = remove_file(file_identifier=file_name.split(".")[0],
+                                 force_remove=True,
+                                 suppress_output=True)
+        else:
+            status = self._env.remove_file(file_name=file_name,
+                                           suppress_output=True)
+        if not status:
+            raise TeradataMlException(
+                f"Script file '{file_name}' failed to remove in Vantage."
+            )
     def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
                                                                 idx_delim=",",
                                                                 types_delim="--"):
@@ -261,7 +266,7 @@ class _GenericObjectWrapper:
                 args_str += f" {strr}"
         return args_str
-    def extract_sklearn_obj(self, n_unique_partitions = 1, n_partition_cols = 1):
+    def _extract_model_objs(self, n_unique_partitions=1, n_partition_cols=1):
         """
         Internal function to extract sklearn object from the model(s) depending on the number of
         partitions. When it is only one model, it is directly used as sklearn object (modelObj).
@@ -294,33 +299,130 @@ class _GenericObjectWrapper:
         warnings.filterwarnings("default")
+    def _validate_existence_of_partition_columns(self, partition_columns, all_columns, arg_names_for_dfs):
+        """
+        Validate if columns in "partition_columns" argument are present in any of the given
+        dataframes.
+        """
+        invalid_part_cols = [c for c in partition_columns if c not in all_columns]
-class _OpenSourceObjectWrapper(_GenericObjectWrapper):
-    # This has to be set for every package which subclasses this class.
-    OPENSOURCE_PACKAGE_NAME = None
+        if invalid_part_cols:
+            raise ValueError(Messages.get_message(MessageCodes.INVALID_PARTITIONING_COLS,
+                                                  ", ".join(invalid_part_cols),
+                                                  "', '".join(arg_names_for_dfs))
+                                                  )
-    def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
-        if not model and not module_name and not class_name:
-            raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, "model",
-                                                           "module_name and class_name"),
-                                      MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
+    def _prepare_data_args_string(self, kwargs):
+        """
+        Get column indices and types of each data related arguments in the format:
+        "{<arg_name>-<comma separated indices>-<comma separated types>}--
+         {<arg_name>-<comma separated indices>-<comma separated types>}"
+        """
+        data_args_str = []
+        for arg_name in list(self._data_args.keys()):
+            # Remove DataFrame arguments from kwargs, which will be passed to Script.
+            kwargs.pop(arg_name)
-        validator._validate_mutually_inclusive_arguments(module_name, "module_name",
-                                                         class_name, "class_name")
+            # Get column indices and their types for each dataframe from parent dataframe.
+            _, partition_indices_str, partition_types_str, _ = \
+                self._get_data_col_types_and_partition_col_indices_and_types(self._tdml_df,
+                                                                   self._data_args[arg_name].columns,
+                                                                   idx_delim=",",
+                                                                   types_delim=",")
+            # Format "<arg_name>-<comma separated indices>-<comma separated types>"
+            data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
+        # Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
+        #    {<arg_name>-<comma separated indices>-<comma separated types>}"
+        return "--".join(data_args_str)
-        super().__init__()
+    def _prepare_and_install_file(self, replace_dict):
+        """
+        Prepare function script file from template file and install it in Vantage.
+        Takes the dictionary with keys as strings to be replaced in script and values as
+        strings which should be added in place of keys.
+        """
-        self.module_name = module_name
-        self.class_name = class_name
-        self.kwargs = kwargs if kwargs is not None else {}
-        self.pos_args = pos_args if pos_args is not None else tuple()
+        with open(os.path.join(self._scripts_path, self._template_file)) as fp:
+            script_data = fp.read()
+        for old, new in replace_dict.items():
+            script_data = script_data.replace(old, new)
-        self._fit_label_columns_types = None
-        self._table_name_prefix = None
+        self._script_file_local = os.path.join(self._tdml_tmp_dir, self._script_file_name)
-        self._is_default_partition_value_fit = True # False when the user provides partition columns.
-        self._fit_partition_colums_non_default = None
-        self._is_default_partition_value_predict = True # False when the user provides partition columns.
+        with open(self._script_file_local, "w") as fp:
+            fp.write(script_data)
+        self._install_script_file(file_identifier=self._script_file_name.split(".")[0],
+                                  file_name=self._script_file_name,
+                                  file_location=self._tdml_tmp_dir)
+    def _get_dataframe_related_args_and_their_columns(self, kwargs):
+        """
+        Get dataframe related arguments and return all their column names from kwargs.
+        """
+        __data_columns = []
+        __data_args_dict = OrderedDict()
+        # Separate dataframe related arguments and their column names from actual kwargs.
+        for k, v in kwargs.items():
+            if isinstance(v, DataFrame):
+                # All dataframes should be select of parent dataframe.
+                _validate_df_query_type(v, "select", k)
+                # Save all columns in dataframe related arguments.
+                __data_columns.extend(v.columns)
+                __data_args_dict[k] = v
+        return __data_args_dict, __data_columns
+    def _process_data_for_funcs_returning_objects(self, kwargs):
+        """
+        Internal function to process all arguments and assign self._data_args, self._tdml_df
+        and return
+        1. dictionary of elements (needed to replace in the script template file)
+        2. partition columns list.
+        """
+        partition_cols = self._get_columns_as_list(kwargs.get("partition_columns", None))
+        if partition_cols:
+            kwargs.pop("partition_columns")
+        self._data_args, __data_columns = self._get_dataframe_related_args_and_their_columns(kwargs)
+        arg_names_for_dfs = list(self._data_args.keys())
+        # Get common parent dataframe from all dataframes.
+        self._tdml_df =  DataFrameUtils()._get_common_parent_df_from_dataframes(list(self._data_args.values()))
+        self._tdml_df = self._tdml_df.select(__data_columns + partition_cols)
+        self._validate_existence_of_partition_columns(partition_cols, self._tdml_df.columns, arg_names_for_dfs)
+        self._tdml_df, partition_cols = self._get_data_and_data_partition_columns(self._tdml_df,
+                                                                                   __data_columns,
+                                                                                   [],
+                                                                                   partition_cols
+                                                                                   )
+        # Prepare string of data arguments with name, indices where columns of that argument resides
+        # and types of each of the column.
+        data_args_str = self._prepare_data_args_string(kwargs)
+        # Get indices of partition_columns and types of all columns.
+        data_column_types_str, partition_indices_str, _, partition_cols = \
+            self._get_data_col_types_and_partition_col_indices_and_types(self._tdml_df,
+                                                                         partition_cols,
+                                                                         types_delim=None,
+                                                                         idx_delim=None)
+        replace_dict = {"<partition_cols_indices>": str(partition_indices_str),
+                        "<types_of_data_cols>": str(data_column_types_str),
+                        "<data_args_info_str>": f"'{data_args_str}'"}
+        return replace_dict, partition_cols
     def _validate_equality_of_partition_values(self, fit_values, trans_values):
         """
@@ -335,294 +437,139 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
         return True
-    def _validate_unique_partition_values(self, data, partition_columns):
+    def _get_non_data_related_args_from_kwargs(self, kwargs):
         """
-        Internal function to validate if the partition values in partition_columns used in fit()
-        and predict() are same.
+        Get all non-data related arguments from kwargs.
         """
-        data._index_label = None
-        unique_values = data.drop_duplicate(partition_columns).get_values()
-        trans_unique_values = sorted(unique_values.tolist(), key=lambda x: tuple(x))
-        fit_unique_values = sorted(self._fit_partition_unique_values.tolist() \
-                                    if not isinstance(self._fit_partition_unique_values, list) \
-                                    else self._fit_partition_unique_values, key=lambda x: tuple(x))
-        default_unique_values = [[self._default_data_partition_value]]
-        if fit_unique_values == default_unique_values and \
-            trans_unique_values != default_unique_values:
-            error_msg = Messages.get_message(MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT,
-                                             "without", "with")
-            msg_code = MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT
-            raise TeradataMlException(error_msg, msg_code)
+        non_data_related_args = {}
+        for k, v in kwargs.items():
+            if not isinstance(v, DataFrame):
+                non_data_related_args[k] = v
+        non_data_related_args.pop("partition_columns", None)
+        return non_data_related_args
-        if not self._validate_equality_of_partition_values(fit_unique_values, trans_unique_values):
-            raise TeradataMlException(
-                Messages.get_message(MessageCodes.PARTITION_VALUES_NOT_MATCHING),
-                MessageCodes.PARTITION_VALUES_NOT_MATCHING
-            )
+    def _read_from_template_and_write_dict_to_file(self, template_file, replace_dict,
+                                                   output_script_file_name=None):
+        """
+        Read template file, replace the keys with values and write to new file.
+        """
+        with open(os.path.join(self._scripts_path, template_file)) as fp:
+            script_data = fp.read()
+        for old, new in replace_dict.items():
+            script_data = script_data.replace(old, new)
-    def fit(self, **kwargs):
-        pass
+        if output_script_file_name is None:
+            output_script_file_name = self._script_file_name
+        file_path = os.path.join(self._tdml_tmp_dir, output_script_file_name)
+        with open(file_path, "w") as fp:
+            fp.write(script_data)
-    def __get_obj_attributes_multi_model(self, name):
+    def _generate_script_file_from_template_file(self, kwargs, template_file, func_name,
+                                                 output_script_file_name=None):
         """
-        Internal function to get attributes of all sklearn model objects when multiple models are
-        generated by fit.
+        Internal function to generate script file from template file. It just adds the non-data
+        related arguments to the template file and writes the contents to new file, so that these
+        arguments are available in the script file for running this function "func_name".
         """
-        # Wrapper function to invoke dynamic method, using arguments
-        # passed by user, on model in each row.
-        def __sklearn_method_invoker_for_multimodel(*c, **kwargs):
-            multi_models = self.modelObj.copy()
-            for i in range(multi_models.shape[0]):
-                curr_model = multi_models.iloc[i]["model"]
-                multi_models.at[i, "model"] = getattr(curr_model, name)(*c, **kwargs)
-            return multi_models.rename(columns={"model": name})
+        # Take out all non-data related arguments to write to template file.
+        non_data_related_args = self._get_non_data_related_args_from_kwargs(kwargs)
-        # Identify if attribute is callable or not to avoid
-        # this check in loop for every model.
-        is_attr_callable = False
-        # Assuming that self.modelObj will have at least 1 row.
-        is_attr_callable = callable(getattr(self.modelObj.iloc[0]["model"], name))
+        # Read template file and write the contents to new file with non-data related arguments.
+        template_f = os.path.join(self._scripts_path, template_file)
+        with open(template_f, "r") as f:
+            template = f.read()
-        # If attribute is callable, it should be applied on model in each row
-        # using passed arguments.
-        if is_attr_callable:
-            return __sklearn_method_invoker_for_multimodel
+        if output_script_file_name is None:
+            output_script_file_name = self._script_file_name
+        file_path = os.path.join(self._tdml_tmp_dir, output_script_file_name)
+        with open(file_path, "w") as f:
+            f.write("import json\n")
+            f.write(f"params = json.loads('{json.dumps(non_data_related_args)}')\n")
+            f.write(template)
-        output_attributes = self.modelObj.copy()
-        for i in range(output_attributes.shape[0]):
-            model = output_attributes.iloc[i]["model"]
-            output_attributes.at[i, "model"] = getattr(model, name)
-        return output_attributes.rename(columns={"model": name})
+        kwargs["file_name"] = output_script_file_name
+        kwargs["name"] = func_name
-    def __getattr__(self, name):
-        # This just run attributes (functions and properties) from sklearn object.
-        def __sklearn_method_invoker(*c, **kwargs):
-            return atrribute_instance(*c, **kwargs)
-        if isinstance(self.modelObj, pd.DataFrame):
-            return self.__get_obj_attributes_multi_model(name)
+    def _remove_data_related_args_from_kwargs(self, kwargs):
+        """
+        Internal function to remove data related arguments from kwargs.
+        """
+        kwargs.pop("data", None)
+        kwargs.pop("feature_columns", None)
+        kwargs.pop("group_columns", None)
+        kwargs.pop("partition_columns", None)
+        kwargs.pop("label_columns", None)
-        atrribute_instance = getattr(self.modelObj, name)
-        if callable(atrribute_instance):
-            return __sklearn_method_invoker
-        return atrribute_instance
+    def _convert_pos_args_to_kwargs_for_function(self, pos_args, kwargs, func_name):
+        """
+        Internal function to convert positional arguments to keyword arguments.
+        """
+        fn = getattr(getattr(import_module(self.module_name), self.class_name), func_name)
+        kwargs.update(zip(fn.__code__.co_varnames[1:], pos_args))
-    @classmethod
-    def _validate_model_supportability(cls, model):
+    def _install_model_and_script_files(self, file_name, file_location):
         """
-        Internal function to validate if the model provided for deployment is supported by
-        teradataml's opensourceML.
+        Internal function to install model and script files to Vantage.
         """
-        error_msg = Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED, "validate",
-                                         "The given model is not a supported opensource model.")
-        msg_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
-        try:
-            # For scikit-learn, model.__module__ is similar to 'sklearn.linear_model._base'.
-            # TODO: check for other supported packages.
-            if model.__module__.split(".")[0] not in OpenSourcePackage.values():
-                raise TeradataMlException(error_msg, msg_code)
-        except Exception as ex:
-            # If in case, model.__module__ fails.
-            raise TeradataMlException(error_msg, msg_code) from ex
+        self._install_initial_model_file()
+        self._install_script_file(file_identifier=file_name.split(".")[0],
+                                  file_name=file_name,
+                                  is_binary=False,
+                                  file_location=file_location)
-    def _save_model(self, model_name, replace_if_exists=False):
+    def _assign_fit_variables_after_execution(self, data, partition_columns, label_columns):
         """
-        Internal function to save the model stored in file at location mentioned by class variable
-        "model_file_path_local" to Vantage using BYOM methods save_byom() and delete_byom() based
-        on the value of "replace_if_exists" argument.
+        Internal function to assign fit related variables.
         """
-        # Creating a table, if doesn't exist, in Vantage to store the model info.
-        conn = get_connection()
-        osml_models_table_exists = conn.dialect.has_table(conn,
-                                                          table_name=_OSML_MODELS_TABLE_NAME,
-                                                          schema=self._db_name)
-        if not osml_models_table_exists:
-            all_columns = _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT.copy()
-            all_columns.update(_OSML_ADDITIONAL_COLUMN_TYPES)
-            _create_table(table_name=_OSML_MODELS_TABLE_NAME, columns=all_columns,
-                          primary_index=_OSML_MODELS_PRIMARY_INDEX, schema_name=self._db_name)
+        # Extract sklearn object(s) from the depending on the number of unique partitioning values.
+        self._extract_model_objs(n_unique_partitions=len(self._fit_partition_unique_values),
+                                 n_partition_cols=len(partition_columns))
-        model_obj = OpensourceModels(is_default_partition_value=self._is_default_partition_value_fit,
-                                     partition_file_prefix=self._model_file_name_prefix,
-                                     fit_partition_columns_non_default=self._fit_partition_colums_non_default,
-                                     model=self.modelObj,
-                                     pos_args=self.pos_args,
-                                     key_args=self.kwargs)
+        # Need this label columns types in prediction.
+        self._fit_label_columns_types = []
+        self._fit_label_columns_python_types = []
-        # Saved the model object to a file to be used in save_byom() for writing to Vantage table.
-        file_name = os.path.join(self._tdml_tmp_dir, "deployed_file.pickle")
-        with open(file_name, "wb+") as fp:
-            fp.write(pickle.dumps(model_obj))
+        for l_c in label_columns:
+            column_data = data._td_column_names_and_sqlalchemy_types[l_c.lower()]
+            self._fit_label_columns_types.append(column_data)
+            self._fit_label_columns_python_types.append(column_data.python_type.__name__)
-        try:
-            save_byom(model_id=model_name,
-                    model_file=file_name,
-                    table_name=_OSML_MODELS_TABLE_NAME,
-                    additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
-                    additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
-        except TeradataMlException as ex:
-            model_exists_msg = Messages.get_message(MessageCodes.MODEL_ALREADY_EXISTS, model_name)
-            if not replace_if_exists and model_exists_msg == str(ex):
-                raise
-            elif replace_if_exists and model_exists_msg == str(ex):
-                # Delete the model from Model table and save again.
-                delete_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME)
-                save_byom(model_id=model_name,
-                          model_file=file_name,
-                          table_name=_OSML_MODELS_TABLE_NAME,
-                          additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
-                          additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
-            else:
-                raise
-        finally:
-            os.remove(file_name)
+        # If the model is trained a second time after the object creation,
+        # or if set_params() is called after the first model training,
+        # this flag will reset to False. So that for subsequent predict/score
+        # operations, the newly trained model will be installed.
+        if self._is_trained_model_installed:
+            self._is_trained_model_installed = False
-    @classmethod
-    def _deploy(cls, model_name, model, replace_if_exists=False):
-        """
-        Internal function to create an instance of the class using the model and deploy
-        the model to Vantage.
-        """
-        cls._validate_model_supportability(model=model)
-        cls = cls(model=model)
-        # Load the model file into Vantage node as file can be used in
-        # predict or other operations.
-        cls._install_initial_model_file()
+class _OpenSourceObjectWrapper(_GenericObjectWrapper):
+    # This has to be set for every package which subclasses this class.
+    OPENSOURCE_PACKAGE_NAME = None
-        cls._save_model(model_name, replace_if_exists)
-        return cls
-    @classmethod
-    def _load(cls, model_name):
-        """
-        Internal function to load model corresponding to the package (like sklearn etc)
-        from Vantage to client using retrieve_byom() and create an instance of the class if
-        the model is from the same package.
-        """
-        try:
-            model = retrieve_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME,
-                                  return_addition_columns=True)
-        except TeradataMlException as ex:
-            # Not showing table name in error message as it is an internal table.
-            part_msg = f"Model '{model_name}' not found in the table "
-            if part_msg in str(ex):
-                raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name, ""),
-                                          MessageCodes.MODEL_NOT_FOUND)
-            raise
-        model_vals_list = model.get_values()[0]
-        # List of 3 elements -
-        #   - model name as index column,
-        #   - 1st contains model object with fields: is_default_partition_value, partition_file_prefix, model. etc
-        #   - 2nd contains package name.
-        model_obj = pickle.loads(model_vals_list[0])
-        model = model_obj.model
-        package = model_vals_list[1]
-        if package != cls.OPENSOURCE_PACKAGE_NAME.value:
-            # Raise error if trying to access model of different package.
-            raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name,
-                                        f". Requested model is from '{package}' package"),
-                                      MessageCodes.MODEL_NOT_FOUND)
-        if isinstance(model, pd.DataFrame):
-            # Create a new instance of the class and set the model object to the instance.
-            # Instantiation can take only model, not model object. Hence, passing one of the model
-            # from pandas df. Updating modelObj and other fields later
-            cls = cls(model=model.iloc[1,2])
-            cls.modelObj = model
-            cls._fit_partition_unique_values = [lst[:len(lst)-1] for lst in model.values.tolist()]
-        else:
-            cls = cls(model=model)
-        cls._model_file_name_prefix = model_obj.partition_file_prefix
-        cls._is_default_partition_value_fit = model_obj.is_default_partition_value
-        cls._fit_partition_colums_non_default = model_obj.fit_partition_columns_non_default
-        cls.pos_args = model_obj.pos_args
-        cls.kwargs = model_obj.key_args
-        # Load the model file into Vantage node as file can be used in
-        # predict or other operations.
-        cls._install_initial_model_file()
-        return cls
-    def deploy(self, model_name, replace_if_exists=False):
-        """
-        DESCRIPTION:
-            Deploys the model held by interface object to Vantage.
-        PARAMETERS:
-            model_name:
-                Required Argument.
-                Specifies the unique name of the model to be deployed.
-                Types: str
-            replace_if_exists:
-                Optional Argument.
-                Specifies whether to replace the model if a model with the same name already
-                exists in Vantage. If this argument is set to False and a model with the same
-                name already exists, then the function raises an exception.
-                Default Value: False
-                Types: bool
-        RETURNS:
-            The opensource object wrapper.
-        RAISES:
-            TeradataMLException if model with "model_name" already exists and the argument
-            "replace_if_exists" is set to False.
-        EXAMPLES:
-            >>> from teradataml import td_sklearn
-            >>> model = td_sklearn.LinearRegression(normalize=True)
-            >>> model
-            LinearRegression(normalize=True)
-            # Example 1: Deploy the model held by interface object to Vantage.
-            >>> lin_reg = model.deploy("linreg_model_ver_2")
-            Model is saved.
-            >>> lin_reg
-            LinearRegression(normalize=True)
-            # Example 2: Deploy the model held by interface object to Vantage with the name same
-            #            as that of model that already existed in Vantage.
-            >>> lin_reg = model.deploy("linreg_model_ver_2", replace_if_exists=True)
-            Model is deleted.
-            Model is saved.
-            >>> lin_reg
-            LinearRegression(normalize=True)
-        """
-        # Install model file into Vantage, if not installed.
-        self._install_initial_model_file()
-        self._save_model(model_name, replace_if_exists)
-        return self
+    def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
+        if model is None and not module_name and not class_name:
+            raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, "model",
+                                                           "module_name and class_name"),
+                                      MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
+        validator._validate_mutually_inclusive_arguments(module_name, "module_name",
+                                                         class_name, "class_name")
-class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
+        super().__init__()
-    OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.SKLEARN
+        self.module_name = module_name
+        self.class_name = class_name
+        self.kwargs = kwargs if kwargs is not None else {}
+        self.pos_args = pos_args if pos_args is not None else tuple()
-    def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
-        super().__init__(model=model, module_name=module_name, class_name=class_name,
-                         pos_args=pos_args, kwargs=kwargs)
+        self._fit_label_columns_types = None
+        self._fit_label_columns_python_types = None
+        self._table_name_prefix = None
-        self._initialize_variables()
-        if model:
-            self.modelObj = model
-            self.module_name = model.__module__.split("._")[0]
-            self.class_name = model.__class__.__name__
-            # __dict__ gets all the arguments as dictionary including default ones and positional
-            # args.
-            self.kwargs = model.__dict__
-            self.pos_args = tuple() # Kept empty as all are moved to kwargs.
-        else:
-            self._initialize_object()
+        self._is_default_partition_value_fit = True # False when the user provides partition columns.
+        self._fit_partition_colums_non_default = None
+        self._is_default_partition_value_predict = True # False when the user provides partition columns.
     def __repr__(self):
         if self._is_default_partition_value_fit:
@@ -636,19 +583,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         pd.reset_option("display.max_colwidth")
         return opt
-    def _validate_args_and_get_data(self, X=None, y=None, groups=None, kwargs={},
-                                    skip_either_or_that=False):
-        """
-        Internal function to validate arguments passed to exposed opensource APIs and return
-        parent DataFrame, feature columns, label columns, group columns, data partition columns.
-        """
-        _validate_opensource_func_args(X=X, y=y, groups=groups,
-                                       fit_partition_cols=self._fit_partition_colums_non_default,
-                                       kwargs=kwargs,
-                                       skip_either_or_that=skip_either_or_that)
-        return _derive_df_and_required_columns(X=X, y=y, groups=groups, kwargs=kwargs,
-                                        fit_partition_cols=self._fit_partition_colums_non_default)
     def _initialize_object(self):
         """
         Internal function to initialize sklearn object from module name and class name.
@@ -657,6 +591,13 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         imported_args = {}
         # If there are any objects of class `_SkLearnObjectWrapper`, it is modified to
         # corresponding sklearn object.
+        _partition_column_names = None
+        if "partition_columns" in self.kwargs:
+            self._fit_partition_colums_non_default = self.kwargs["partition_columns"]
+            self._is_default_partition_value_fit = False
+            _partition_column_names = self._fit_partition_colums_non_default
         new_sklearn_pos_args = self.modify_args(None, self.pos_args, imported_args)
         new_sklearn_kwargs = self.modify_args(None, self.kwargs, imported_args)
@@ -681,19 +622,33 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                     # TODO: ELE-6351: Skipping adding functions and generators to kwargs as these
                     #       are not supported yet due to pickling issue.
                     continue
-                if k in self.get_params():
-                    self.kwargs[k] = v
+                if self.get_params():
+                    if k in self.get_params():
+                        self.kwargs[k] = v
+                else:
+                    _model_init_arguments = None
+                    try:
+                        _model_init_arguments = self.modelObj.__init__.__code__.co_varnames
+                    except AttributeError:
+                        pass
+                    if _model_init_arguments:
+                        self.kwargs = dict((k, v) for k, v in _arguments.items() if k in _model_init_arguments)
+                    else:
+                        self.kwargs = _arguments
         else:
             # Model selection classes will not have `get_params`, in which case modelObj's __dict__
             # is saved as kwargs.
             self.kwargs = _arguments
-    def _initialize_variables(self):
+        if _partition_column_names:
+            self.kwargs["partition_columns"] = _partition_column_names
+    def _initialize_variables(self, table_name_prefix):
         """
         Internal function to initialize variables used in this class.
         """
         self.feature_names_in_ = None
-        self._table_name_prefix = "td_sklearn_"
+        self._table_name_prefix = table_name_prefix
         self._model_file_name_prefix = _generate_new_name(type="file")
         self.model_file_paths_local = set()
@@ -710,6 +665,20 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         self._is_model_installed = False
         self._fit_partition_unique_values = [[self._default_data_partition_value]]
+    def _get_returning_df(self, script_df, partition_column, returns):
+        """
+        Internal function to return the teradataml Dataframe except
+        partition_column.
+        """
+        if self._is_default_partition_value_fit:
+            # For single model case, partition column is internally generated
+            # and no point in returning it to the user.
+            # Extract columns from return types.
+            returning_cols = [col[0] for col in returns[len(partition_column):]]
+            return script_df.select(returning_cols)
+        return script_df
     def modify_args(self, fp1, arg, imported_args):
         """
         Internal function to recursively (if "arg" is list/tuple/dict) check if any sklearn object
@@ -752,61 +721,480 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                     self.modify_args(fp1, k, imported_args),
                     self.modify_args(fp1, v, imported_args),
                 )
-                for k, v in arg.items()
+                for k, v in arg.items() if k != "partition_columns"
             )
+        # elif arg == "partition_columns":
         else:
             return arg
-    def _install_initial_model_file(self):
-        """
-        If model file(s) is/are not installed in Vantage, then install it/them.
+    def _install_initial_model_file(self, use_dummy_initial_file=False):
+        """
+        If model file(s) is/are not installed in Vantage, then install it/them.
+        """
+        if isinstance(self.modelObj, pd.DataFrame):
+            # Get list of unique partition values and corresponding model object as dict.
+            partition_values_model_dict = {}
+            obj_list = self.modelObj.values.tolist()
+            for lst in obj_list:
+                partition_values_model_dict[tuple(lst[:len(self._fit_partition_colums_non_default)])] = \
+                    lst[len(self._fit_partition_colums_non_default)]
+        for partition in self._fit_partition_unique_values:
+            # Create a new file with file name with partition values and
+            # dump sklearn object into it. Finally install the file to Vantage.
+            partition_join = "_".join([str(x) for x in partition])
+            file_name = f"{self._model_file_name_prefix}_{partition_join}"
+            # Replace '-' with '_' as '-' can't be present in file identifier.
+            # Needed this replace because partition_columns can be negative.
+            file_name = file_name.replace("-", "_")
+            full_file_name = os.path.join(self._tdml_tmp_dir, file_name)
+            with open(full_file_name, "wb+") as fp:
+                # Write sklearn object to file.
+                if isinstance(self.modelObj, pd.DataFrame):
+                    # If multiple models, then write the model corresponding to the partition value.
+                    fp.write(pickle.dumps(partition_values_model_dict[tuple(partition)]))
+                else:
+                    if use_dummy_initial_file:
+                        fp.write(pickle.dumps("abc"))
+                    else:
+                        fp.write(pickle.dumps(self.modelObj))
+            self.model_file_paths_local.add(file_name)
+            self._install_script_file(file_identifier=file_name,
+                                      file_name=file_name,
+                                      is_binary=True,
+                                      file_location=self._tdml_tmp_dir)
+            if self._is_lake_system:
+                # Need to pass env_name along with file_name for cleaning up the files in env.
+                obj = f"{self._env.env_name}::{file_name}"
+                if installed_model_files[obj] == 0:
+                    # Add to GC for the first time the model file (along with env name) is encountered.
+                    installed_model_files[obj] = 1
+                    GarbageCollector._add_to_garbagecollector(object_name=obj,
+                                                object_type=TeradataConstants.TERADATA_APPLY)
+            else:
+                if installed_model_files[file_name] == 0:
+                    # Add to GC for the first time the model file is encountered.
+                    installed_model_files[file_name] = 1
+                    GarbageCollector._add_to_garbagecollector(object_name=file_name,
+                                                object_type=TeradataConstants.TERADATA_SCRIPT)
+            self._is_model_installed = True
+    def _validate_unique_partition_values(self, data, partition_columns):
+        """
+        Internal function to validate if the partition values in partition_columns used in fit()
+        and predict() are same.
+        """
+        data._index_label = None
+        unique_values = data.drop_duplicate(partition_columns).get_values()
+        trans_unique_values = sorted(unique_values.tolist(), key=lambda x: tuple(x))
+        fit_unique_values = sorted(self._fit_partition_unique_values.tolist() \
+                                    if not isinstance(self._fit_partition_unique_values, list) \
+                                    else self._fit_partition_unique_values, key=lambda x: tuple(x))
+        default_unique_values = [[self._default_data_partition_value]]
+        if fit_unique_values == default_unique_values and \
+            trans_unique_values != default_unique_values:
+            error_msg = Messages.get_message(MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT,
+                                             "without", "with")
+            msg_code = MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT
+            raise TeradataMlException(error_msg, msg_code)
+        if not self._validate_equality_of_partition_values(fit_unique_values, trans_unique_values):
+            raise TeradataMlException(
+                Messages.get_message(MessageCodes.PARTITION_VALUES_NOT_MATCHING, "training", "test"),
+                MessageCodes.PARTITION_VALUES_NOT_MATCHING
+            )
+    def fit(self, **kwargs):
+        pass
+    def _convert_arguments_to_modelObj(self, args, idx_multi_model=None):
+        """
+        Internal function to convert all OpensourceML related objects in arguments to
+        underlying model objects.
+        """
+        if isinstance(args, dict):
+            new_args = args.copy() # To avoid updating
+            for k, v in new_args.items():
+                if isinstance(v, type(self)):
+                    if idx_multi_model is not None:
+                        # single model. This argument is set only when modelObj is single model.
+                        new_args[k] = v.modelObj
+                    else:
+                        # multi-model. Get appropriate model from modelObj.
+                        new_args[k] = v.modelObj.iloc[idx_multi_model]["model"]
+                else:
+                    new_args[k] = v
+            return new_args
+        # If args is tuple, convert all elements to underlying model object.
+        elif isinstance(args, tuple):
+            new_args = tuple()
+            for arg in args:
+                if isinstance(arg, type(self)):
+                    if idx_multi_model is None:
+                        # single model. This argument is set only when modelObj is single model.
+                        new_args += (arg.modelObj,)
+                    else:
+                        # multi-model. Get appropriate model from modelObj.
+                        new_args += (arg.modelObj.iloc[idx_multi_model]["model"],)
+                else:
+                    new_args += (arg,)
+            return new_args
+        return args
+    def __get_obj_attributes_multi_model(self, name):
+        """
+        Internal function to get attributes of all sklearn model objects when multiple models are
+        generated by fit.
+        """
+        def __generate_model_object(model_obj_value, init_model_obj):
+            """
+            Internal function to generate _SkLearnWrapperObject model object from model_obj_value.
+            """
+            # Create _SkLearnObjectWrapper object from opensource model object.
+            model_obj = self.__class__(model=init_model_obj)
+            model_obj.modelObj = model_obj_value
+            model_obj._is_model_installed = True
+            # Setting other model attributes.
+            model_obj._is_default_partition_value_fit = self._is_default_partition_value_fit
+            model_obj._is_default_partition_value_predict = self._is_default_partition_value_predict
+            model_obj._fit_partition_colums_non_default = self._fit_partition_colums_non_default
+            model_obj._fit_partition_unique_values = self._fit_partition_unique_values
+            return model_obj
+        # Wrapper function to invoke dynamic method, using arguments
+        # passed by user, on model in each row.
+        def __sklearn_method_invoker_for_multimodel(*c, **kwargs):
+            multi_models = self.modelObj.copy()
+            for i in range(multi_models.shape[0]):
+                curr_model = multi_models.iloc[i]["model"]
+                partition_values = multi_models.iloc[i][0:len(self._fit_partition_colums_non_default)].to_list()
+                partition_values = "_".join([str(x) for x in partition_values])
+                if self.module_name == "lightgbm.basic" and self.class_name == "Booster" and name == "save_model":
+                    # filename is first argument.
+                    kwargs1 = kwargs.copy()
+                    c1 = c
+                    if len(c) > 0:
+                        c1 = list(c1)
+                        c1[0] = f"{c1[0]}_{partition_values}"
+                        c1 = tuple(c1)
+                    if len(kwargs) > 0 and kwargs.get("filename", None):
+                        kwargs1["filename"] = f"{kwargs1['filename']}_{partition_values}"
+                    multi_models.at[i, "model"] = getattr(curr_model, name)(*self._convert_arguments_to_modelObj(c1, i),
+                                                                            **self._convert_arguments_to_modelObj(kwargs1, i))
+                else:
+                    multi_models.at[i, "model"] = getattr(curr_model, name)(*self._convert_arguments_to_modelObj(c, i),
+                                                                            **self._convert_arguments_to_modelObj(kwargs, i))
+            first_function_value = multi_models.at[0, "model"]
+            if self.__class__._validate_model_supportability(first_function_value):
+                return __generate_model_object(multi_models, init_model_obj=first_function_value)
+            multi_models = multi_models.rename(columns={"model": name})
+            # Select only partition columns and the attribute column.
+            return multi_models[self._fit_partition_colums_non_default + [name]]
+        # Assuming that self.modelObj will have at least 1 row.
+        # Get attribute instance from first model object.
+        first_atrribute_instance = getattr(self.modelObj.iloc[0]["model"], name)
+        # If first_atrribute_instance is callable, it should be applied on model in each row
+        # using passed arguments.
+        if callable(first_atrribute_instance):
+            return __sklearn_method_invoker_for_multimodel
+        output_attributes = self.modelObj.copy()
+        for i in range(output_attributes.shape[0]):
+            model = output_attributes.iloc[i]["model"]
+            output_attributes.at[i, "model"] = getattr(model, name)
+        if self.__class__._validate_model_supportability(first_atrribute_instance):
+            return __generate_model_object(output_attributes, init_model_obj=first_atrribute_instance)
+        return output_attributes.rename(columns={"model": name})
+    def __getattr__(self, name):
+        # This just run attributes (functions and properties) from opensource (sklearn/lightgbm) objects.
+        def __sklearn_method_invoker(*c, **kwargs):
+            # Opensource model is returned from the function call. Create _OpensourceObjectWrapper object.
+            model_obj = attribute_instance(*self._convert_arguments_to_modelObj(c), **self._convert_arguments_to_modelObj(kwargs))
+            if self.__class__._validate_model_supportability(model_obj):
+                model_obj = self.__class__(model=model_obj)
+                model_obj._is_model_installed = True # Trained model is returned by function call.
+            return model_obj
+        if isinstance(self.modelObj, pd.DataFrame):
+            return self.__get_obj_attributes_multi_model(name)
+        attribute_instance = getattr(self.modelObj, name)
+        if callable(attribute_instance):
+            return __sklearn_method_invoker
+        if self.__class__._validate_model_supportability(attribute_instance):
+            # sklearn model is returned from the attribute. Create _SkLearnObjectWrapper object.
+            model_obj = self.__class__(model=attribute_instance)
+            model_obj._is_model_installed = True # Trained model is returned as attribute.
+            return model_obj
+        return attribute_instance
+    @classmethod
+    def _validate_model_supportability(cls, model):
+        """
+        Internal function to validate if the model provided for deployment is supported by
+        teradataml's opensourceML.
+        """
+        error_msg = Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED, "validate",
+                                         "The given model is not a supported opensource model.")
+        msg_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
+        package_name = None
+        class_name = None
+        try:
+            # For scikit-learn, model.__module__ is similar to 'sklearn.linear_model._base'.
+            # TODO: check for other supported packages.
+            if hasattr(model, "__module__"):
+                package_name = model.__module__.split(".")[0]
+                if package_name not in OpenSourcePackage.values():
+                    return False
+            if hasattr(model, "__class__"):
+                class_name = model.__class__.__name__
+        except Exception as ex:
+            # If in case, model.__module__ fails.
+            raise TeradataMlException(error_msg, msg_code) from ex
+        # True only if package name is opensource package name and class name is not internal class.
+        return True if package_name and class_name and \
+            package_name == cls.OPENSOURCE_PACKAGE_NAME.value and not class_name.startswith("_") else False
+    def _save_model(self, model_name, replace_if_exists=False):
+        """
+        Internal function to save the model stored in file at location mentioned by class variable
+        "model_file_path_local" to Vantage using BYOM methods save_byom() and delete_byom() based
+        on the value of "replace_if_exists" argument.
+        """
+        # Creating a table, if doesn't exist, in Vantage to store the model info.
+        conn = get_connection()
+        osml_models_table_exists = conn.dialect.has_table(conn,
+                                                          table_name=_OSML_MODELS_TABLE_NAME,
+                                                          schema=self._db_name,
+                                                          table_only=True)
+        if not osml_models_table_exists:
+            all_columns = _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT.copy()
+            all_columns.update(_OSML_ADDITIONAL_COLUMN_TYPES)
+            _create_table(table_name=_OSML_MODELS_TABLE_NAME, columns=all_columns,
+                          primary_index=_OSML_MODELS_PRIMARY_INDEX, schema_name=self._db_name)
+        model_obj = OpensourceModels(is_default_partition_value=self._is_default_partition_value_fit,
+                                     partition_file_prefix=self._model_file_name_prefix,
+                                     fit_partition_columns_non_default=self._fit_partition_colums_non_default,
+                                     model=self.modelObj,
+                                     pos_args=self.pos_args,
+                                     key_args=self.kwargs)
+        # Saved the model object to a file to be used in save_byom() for writing to Vantage table.
+        file_name = os.path.join(self._tdml_tmp_dir, "deployed_file.pickle")
+        with open(file_name, "wb+") as fp:
+            fp.write(pickle.dumps(model_obj))
+        try:
+            save_byom(model_id=model_name,
+                    model_file=file_name,
+                    table_name=_OSML_MODELS_TABLE_NAME,
+                    additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
+                    additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
+        except TeradataMlException as ex:
+            model_exists_msg = Messages.get_message(MessageCodes.MODEL_ALREADY_EXISTS, model_name)
+            if not replace_if_exists and model_exists_msg == str(ex):
+                raise
+            elif replace_if_exists and model_exists_msg == str(ex):
+                # Delete the model from Model table and save again.
+                delete_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME)
+                save_byom(model_id=model_name,
+                          model_file=file_name,
+                          table_name=_OSML_MODELS_TABLE_NAME,
+                          additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
+                          additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
+            else:
+                raise
+        finally:
+            os.remove(file_name)
+    @classmethod
+    def _deploy(cls, model_name, model, replace_if_exists=False):
+        """
+        Internal function to create an instance of the class using the model and deploy
+        the model to Vantage.
+        """
+        is_model_supportable = cls._validate_model_supportability(model=model)
+        if not is_model_supportable:
+            raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED,
+                                                           "deploy", "The given model is not a supported opensource model."),
+                                      MessageCodes.MODEL_CATALOGING_OPERATION_FAILED)
+        cls = cls(model=model)
+        # Load the model file into Vantage node as file can be used in
+        # predict or other operations.
+        cls._install_initial_model_file()
+        cls._save_model(model_name, replace_if_exists)
+        return cls
+    @classmethod
+    def _load(cls, model_name):
+        """
+        Internal function to load model corresponding to the package (like sklearn etc)
+        from Vantage to client using retrieve_byom() and create an instance of the class if
+        the model is from the same package.
+        """
+        try:
+            model = retrieve_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME,
+                                  return_addition_columns=True)
+        except TeradataMlException as ex:
+            # Not showing table name in error message as it is an internal table.
+            part_msg = f"Model '{model_name}' not found in the table "
+            if part_msg in str(ex):
+                raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name, ""),
+                                          MessageCodes.MODEL_NOT_FOUND)
+            raise
+        model_vals_list = model.get_values()[0]
+        # List of 3 elements -
+        #   - model name as index column,
+        #   - 1st contains model object with fields: is_default_partition_value, partition_file_prefix, model. etc
+        #   - 2nd contains package name.
+        model_obj = pickle.loads(model_vals_list[0])
+        model = model_obj.model
+        package = model_vals_list[1]
+        if package != cls.OPENSOURCE_PACKAGE_NAME.value:
+            # Raise error if trying to access model of different package.
+            raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name,
+                                        f". Requested model is from '{package}' package"),
+                                      MessageCodes.MODEL_NOT_FOUND)
+        if isinstance(model, pd.DataFrame):
+            # Create a new instance of the class and set the model object to the instance.
+            # Instantiation can take only model, not model object. Hence, passing one of the model
+            # from pandas df. Updating modelObj and other fields later
+            cls = cls(model=model.iloc[1,2])
+            cls.modelObj = model
+            cls._fit_partition_unique_values = [lst[:len(lst)-1] for lst in model.values.tolist()]
+        else:
+            cls = cls(model=model)
+        cls._model_file_name_prefix = model_obj.partition_file_prefix
+        cls._is_default_partition_value_fit = model_obj.is_default_partition_value
+        cls._fit_partition_colums_non_default = model_obj.fit_partition_columns_non_default
+        cls.pos_args = model_obj.pos_args
+        cls.kwargs = model_obj.key_args
+        # Load the model file into Vantage node as file can be used in
+        # predict or other operations.
+        cls._install_initial_model_file()
+        return cls
+    def deploy(self, model_name, replace_if_exists=False):
+        """
+        DESCRIPTION:
+            Deploys the model held by interface object to Vantage.
+        PARAMETERS:
+            model_name:
+                Required Argument.
+                Specifies the unique name of the model to be deployed.
+                Types: str
+            replace_if_exists:
+                Optional Argument.
+                Specifies whether to replace the model if a model with the same name already
+                exists in Vantage. If this argument is set to False and a model with the same
+                name already exists, then the function raises an exception.
+                Default Value: False
+                Types: bool
+        RETURNS:
+            The opensource object wrapper.
+        RAISES:
+            TeradataMLException if model with "model_name" already exists and the argument
+            "replace_if_exists" is set to False.
+        EXAMPLES:
+            >>> from teradataml import td_sklearn
+            >>> model = td_sklearn.LinearRegression(normalize=True)
+            >>> model
+            LinearRegression(normalize=True)
+            # Example 1: Deploy the model held by interface object to Vantage.
+            >>> lin_reg = model.deploy("linreg_model_ver_2")
+            Model is saved.
+            >>> lin_reg
+            LinearRegression(normalize=True)
+            # Example 2: Deploy the model held by interface object to Vantage with the name same
+            #            as that of model that already existed in Vantage.
+            >>> lin_reg = model.deploy("linreg_model_ver_2", replace_if_exists=True)
+            Model is deleted.
+            Model is saved.
+            >>> lin_reg
+            LinearRegression(normalize=True)
         """
-        if isinstance(self.modelObj, pd.DataFrame):
-            # Get list of unique partition values and corresponding model object as dict.
-            partition_values_model_dict = {}
-            obj_list = self.modelObj.values.tolist()
-            for lst in obj_list:
-                partition_values_model_dict[tuple(lst[:len(lst)-1])] = lst[len(lst)-1]
-        for partition in self._fit_partition_unique_values:
-            # Create a new file with file name with partition values and
-            # dump sklearn object into it. Finally install the file to Vantage.
-            partition_join = "_".join([str(x) for x in partition])
-            file_name = f"{self._model_file_name_prefix}_{partition_join}"
-            # Replace '-' with '_' as '-' can't be present in file identifier.
-            # Needed this replace because partition_columns can be negative.
-            file_name = file_name.replace("-", "_")
-            full_file_name = os.path.join(self._tdml_tmp_dir, file_name)
-            with open(full_file_name, "wb+") as fp:
-                # Write sklearn object to file.
-                if isinstance(self.modelObj, pd.DataFrame):
-                    # If multiple models, then write the model corresponding to the partition value.
-                    fp.write(pickle.dumps(partition_values_model_dict[tuple(partition)]))
-                else:
-                    fp.write(pickle.dumps(self.modelObj))
-            self.model_file_paths_local.add(file_name)
+        # Install model file into Vantage, if not installed.
+        self._install_initial_model_file()
-            self._install_script_file(file_identifier=file_name,
-                                      file_name=file_name,
-                                      is_binary=True,
-                                      file_location=self._tdml_tmp_dir)
+        self._save_model(model_name, replace_if_exists)
+        return self
-            if self._is_lake_system:
-                # Need to pass env_name along with file_name for cleaning up the files in env.
-                obj = f"{self._env.env_name}::{file_name}"
-                if installed_model_files[obj] == 0:
-                    # Add to GC for the first time the model file (along with env name) is encountered.
-                    installed_model_files[obj] = 1
-                    GarbageCollector._add_to_garbagecollector(object_name=obj,
-                                                object_type=TeradataConstants.TERADATA_APPLY)
-            else:
-                if installed_model_files[file_name] == 0:
-                    # Add to GC for the first time the model file is encountered.
-                    installed_model_files[file_name] = 1
-                    GarbageCollector._add_to_garbagecollector(object_name=file_name,
-                                                object_type=TeradataConstants.TERADATA_SCRIPT)
-            self._is_model_installed = True
+class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
+    OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.SKLEARN
+    def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
+        super().__init__(model=model, module_name=module_name, class_name=class_name,
+                         pos_args=pos_args, kwargs=kwargs)
+        self._initialize_variables(table_name_prefix="td_sklearn_")
+        if model is not None:
+            self.modelObj = model
+            self.module_name = model.__module__.split("._")[0]
+            self.class_name = model.__class__.__name__
+            # __dict__ gets all the arguments as dictionary including default ones and positional
+            # args.
+            self.kwargs = model.__dict__
+            self.pos_args = tuple() # Kept empty as all are moved to kwargs.
+        else:
+            self._initialize_object()
+    def _validate_args_and_get_data(self, X=None, y=None, groups=None, kwargs={},
+                                    skip_either_or_that=False):
+        """
+        Internal function to validate arguments passed to exposed opensource APIs and return
+        parent DataFrame, feature columns, label columns, group columns, data partition columns.
+        """
+        _validate_opensource_func_args(X=X, y=y, groups=groups,
+                                       fit_partition_cols=self._fit_partition_colums_non_default,
+                                       kwargs=kwargs,
+                                       skip_either_or_that=skip_either_or_that)
+        return _derive_df_and_required_columns(X=X, y=y, groups=groups, kwargs=kwargs,
+                                        fit_partition_cols=self._fit_partition_colums_non_default)
     def _run_fit_related_functions(self,
                                    data,
@@ -814,7 +1202,8 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                    label_columns,
                                    partition_columns,
                                    func,
-                                   classes=None):
+                                   classes=None,
+                                   file_name="sklearn_fit.py"):
         """
         Internal function to run fit() and partial_fit() functions.
         """
@@ -829,9 +1218,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
                         for col in new_partition_columns] + [("model", model_type)]
-        file_name = "sklearn_fit.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         if classes:
             class_type = type(classes[0]).__name__
             classes = "--".join([str(x) for x in classes])
@@ -857,13 +1243,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         self._model_data = self._run_script(data, script_command, new_partition_columns,
                                             return_types)
-        # Extract sklearn object(s) from the depending on the number of unique partitioning values.
-        self.extract_sklearn_obj(n_unique_partitions=len(self._fit_partition_unique_values),
-                                 n_partition_cols=len(new_partition_columns))
-        # Need this label columns types in prediction.
-        self._fit_label_columns_types = [data._td_column_names_and_sqlalchemy_types[l_c.lower()]
-                                         for l_c in label_columns]
+        self._assign_fit_variables_after_execution(data, new_partition_columns, label_columns)
     def partial_fit(self, X=None, y=None, classes=None, **kwargs):
         """
@@ -911,11 +1291,19 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             self._is_default_partition_value_fit = False
             self._fit_partition_colums_non_default = partition_columns
-        self._run_fit_related_functions(data,
-                                        feature_columns,
-                                        label_columns,
-                                        partition_columns,
-                                        inspect.stack()[0][3])
+        file_name = kwargs.pop("file_name", None)
+        func_name = kwargs.pop("name", "fit")
+        args = {"data": data,
+                "feature_columns": feature_columns,
+                "label_columns": label_columns,
+                "partition_columns": partition_columns,
+                "func": func_name}
+        if file_name is not None:
+            args["file_name"] = file_name
+        self._run_fit_related_functions(**args)
         self._fit_execution_time = time.time() - st_time
@@ -980,10 +1368,130 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return super().__getattr__(name)
+    def _special_handling_multimodel_(self, data, feature_columns, label_columns, partition_columns,
+                                      func_name, **kwargs):
+        """
+        Internal function to handle multi model case for transform function for functions
+        ["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV"] of feature_selection module
+        and "Birch" of cluster module.
+        These functions generate multiple models and when transform is applied to each model, it generates
+        output with different number of columns.
+        """
+        skl_objs_dict = {}
+        no_of_unique_partitions = len(self._fit_partition_unique_values)
+        no_of_partitioning_cols = len(self._fit_partition_unique_values[0])
+        # Run on 10 rows of data individually using corresponding scikit-learn objects based on paritition value
+        # and get the maximum number of columns and their types.
+        for i in range(no_of_unique_partitions):
+            skl_objs_dict[tuple(self.modelObj.iloc[i, :no_of_partitioning_cols])] = self.modelObj.iloc[i]["model"]
+        data = data.select(feature_columns + label_columns + partition_columns)
+        ten_row_data = data.head(10).get_values()
+        X = numpy.array(ten_row_data)
+        # For multi-model case, model in one AMP can give more number of columns than other AMPs.
+        # Returns clause can't contain different number of columns in different AMPs. Hence, taking
+        # maximum number of columns and their types from all models.
+        max_no_of_columns = 0
+        max_col_names = []
+        max_col_types = []
+        def _get_input_row_without_nans(row):
+            """
+            `inverse_transform` should not contain NaNs. Hence, removing NaNs from the row.
+            """
+            X1 = []
+            for _, v in enumerate(row):
+                if isinstance(v, type(None)) or isinstance(v, str) or not math.isnan(v) or self.module_name == "sklearn.impute":
+                    # Add to list when:
+                    #  - v is None or
+                    #   - v is string or
+                    #   - v is not nan or
+                    #   - if module is impute (which transforms nan values) even though v is nan.
+                    X1.append(v)
+                else:
+                    # skip nan values.
+                    pass
+            return X1
+        for i in range(X.shape[0]):
+            # Run `transform` or `inverse_transform` on each row with corresponding scikit-learn model object.
+            partition_values = tuple(X[i, -no_of_partitioning_cols:])
+            skl_obj = skl_objs_dict[partition_values]
+            X1 = X[i, :-no_of_partitioning_cols]
+            # Since Nans/NULLs are added in transform for last columns where some models generated
+            # less number of columns, removing Nans/NULLs from the input row for inverse_transform
+            # using function _get_input_row_without_nans().
+            X1 = numpy.array([_get_input_row_without_nans(X1)])
+            trans_opt = getattr(skl_obj, func_name)(X1, **kwargs)
+            no_of_columns = 1
+            if trans_opt.shape == (X1.shape[0],):
+                trans_opt = trans_opt.reshape(X1.shape[0], 1)
+            if isinstance(trans_opt[0], numpy.ndarray) \
+                    or isinstance(trans_opt[0], list) \
+                    or isinstance(trans_opt[0], tuple):
+                no_of_columns = len(trans_opt[0])
+            col_names = [f"{self.class_name.lower()}_{func_name}_{(i + 1)}" for i in range(no_of_columns)]
+            # Get new column sqlalchemy types for pandas df columns of transform output.
+            opt_pd = pd.DataFrame(trans_opt)
+            # Get output column types for each column in pandas df from the output of transform
+            # type functions.
+            types = {}
+            for idx in range(no_of_columns):
+                col = list(opt_pd.columns)[idx]
+                # Only one row in trans_opt.
+                if isinstance(trans_opt[0], numpy.ndarray) or isinstance(trans_opt[0], tuple) or isinstance(trans_opt[0], list):
+                    type_ = type(trans_opt[0][idx])
+                else:
+                    # only one value in the output.
+                    type_ = type(trans_opt[0])
+                # If type of the output value (trans_opt) is None, then use `str` as type since
+                # pandas astype() does not accept None type.
+                if type_ is type(None):
+                    type_ = str
+                # numpy integer columns with nan values can't be typecasted using pd.astype() to int64.
+                # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
+                #                       Error while type casting for column '2'"
+                # Hence, using pd.Int64Dtype() for integer columns with nan values.
+                types[col] = type_ if type_ not in [int, numpy.int64] else pd.Int64Dtype()
+            # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
+            opt_pd = opt_pd.astype(types)
+            # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
+            # TIMESTAMP(timezone=True) else map it according to default value.
+            col_types = [TIMESTAMP(timezone=True)
+                        if pt.is_datetime64_ns_dtype(opt_pd.dtypes[key]) and (opt_pd[col_name].dt.tz is not None)
+                        else _get_sqlalchemy_mapping(str(opt_pd.dtypes[key]))
+                        for key, col_name in enumerate(list(opt_pd.columns))]
+            # Different models in multi model case can generate different number of output columns for example in
+            # SelectFpr. Hence, taking the model which generates maximum number of columns.
+            if no_of_columns > max_no_of_columns:
+                max_no_of_columns = no_of_columns
+                max_col_names = col_names
+                max_col_types = col_types
+        return [(c_name, c_type) for c_name, c_type in zip(max_col_names, max_col_types)]
     def _get_return_columns_for_function_(self,
                                           data,
                                           feature_columns,
                                           label_columns,
+                                          partition_columns,
                                           func_name,
                                           kwargs):
         """
@@ -997,7 +1505,8 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}",
                      data._td_column_names_and_sqlalchemy_types[col.lower()])
                     for i, col in enumerate(label_columns)]
-        if func_name == "predict":
+        if func_name == "predict" and self.OPENSOURCE_PACKAGE_NAME == OpenSourcePackage.SKLEARN:
             """
             Return predict columns using either label_columns (if provided) or
             self._fit_label_columns_types (if the function is trained using label columns).
@@ -1012,8 +1521,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                 return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}", col_type)
                         for i, col_type in enumerate(self._fit_label_columns_types)]
-        data = data.select(feature_columns + label_columns)
         ## If function is not `fit_predict`:
         #   then take one row of transform/other functions to execute in client
         #   to get number of columns in return clause and their Vantage types.
@@ -1027,8 +1534,20 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             skl_obj = self.modelObj
         else:
             # Multi model case.
+            if (func_name in ["transform", "inverse_transform"] and \
+                self.class_name in ["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV", "Birch"]) or \
+                (self.module_name == "lightgbm.sklearn" and self.class_name == "LGBMClassifier"):
+                # Special handling for multi model case for transform function as these classes
+                # generate transform output with different number of columns for each model.
+                # Hence, need to add Nulls/Nans to columns which are not present in the transform output of
+                # some models.
+                return self._special_handling_multimodel_(data, feature_columns, label_columns,
+                                                          partition_columns, func_name, **kwargs)
             skl_obj = self.modelObj.iloc[0]["model"]
+        data = data.select(feature_columns + label_columns)
         ten_row_data = data.head(10).get_values()
         X = numpy.array(ten_row_data)
         if label_columns:
@@ -1122,7 +1641,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
             #                       Error while type casting for column '2'"
             # Hence, using pd.Int64Dtype() for integer columns with nan values.
-            types[col] = type_ if type_ != numpy.int64 else pd.Int64Dtype()
+            types[col] = type_ if type_ not in [int, numpy.int64] else pd.Int64Dtype()
         # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
         opt_pd = opt_pd.astype(types)
@@ -1137,7 +1656,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return [(c_name, c_type) for c_name, c_type in zip(col_names, col_types)]
     @_validate_fit_run
-    def _run_function_needing_all_rows(self, X=None, y=None, **kwargs):
+    def _run_function_needing_all_rows(self, X=None, y=None, file_name="sklearn_score.py", **kwargs):
         """
         Internal function to run functions like score, aic, bic which needs all rows and return
         one floating number as result.
@@ -1160,9 +1679,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                                 label_columns,
                                                                                 partition_columns)
-        file_name = "sklearn_score.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1180,7 +1696,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
                         for col in new_partition_columns] + [(func_name, FLOAT())]
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1194,7 +1714,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return opt
     @_validate_fit_run
-    def _transform(self, X=None, y=None, **kwargs):
+    def _transform(self, X=None, y=None, file_name="sklearn_transform.py", **kwargs):
         """
         Internal function to run predict/transform and similar functions, which returns
         multiple columns. This function will return data row along with the generated
@@ -1217,19 +1737,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                                 partition_columns)
         # Since kwargs are passed to transform, removing additional unrelated arguments from kwargs.
-        if "data" in kwargs:
-            kwargs.pop("data")
-        if "feature_columns" in kwargs:
-            kwargs.pop("feature_columns")
-        if "group_columns" in kwargs:
-            kwargs.pop("group_columns")
-        if "partition_columns" in kwargs:
-            kwargs.pop("partition_columns")
-        if "label_columns" in kwargs:
-            kwargs.pop("label_columns")
-        file_name = "sklearn_transform.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
+        self._remove_data_related_args_from_kwargs(kwargs)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1239,26 +1747,42 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         self._validate_unique_partition_values(data, new_partition_columns)
-        py_exc = UtilFuncs._get_python_execution_path()
-        script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
-            f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
-            f"{self._model_file_name_prefix} {self._is_lake_system}"
+        return_columns_python_types = None
+        if self._fit_label_columns_python_types:
+            return_columns_python_types = '--'.join(self._fit_label_columns_python_types)
         # Returning feature columns also along with transformed columns because we don't know the
         # mapping of feature columns to the transformed columns.
-        return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
-                        for col in (new_partition_columns + feature_columns)]
+        ## 'correct_covariance()' returns the (n_features, n_features)
+        if func_name == "correct_covariance":
+            return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
+                            for col in new_partition_columns]
+        else:
+            return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
+                            for col in (new_partition_columns + feature_columns)]
         if func_name in ["predict", "decision_function"] and label_columns:
             return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
                              for col in label_columns]
-        return_types += self._get_return_columns_for_function_(data,
-                                                               feature_columns,
-                                                               label_columns,
-                                                               func_name,
-                                                               kwargs)
-        # Installing model files before running sklearn_transform.py.
-        self._install_initial_model_file()
+        output_cols_types = self._get_return_columns_for_function_(data,
+                                                                   feature_columns,
+                                                                   label_columns,
+                                                                   new_partition_columns,
+                                                                   func_name,
+                                                                   kwargs)
+        return_types += output_cols_types
+        py_exc = UtilFuncs._get_python_execution_path()
+        script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
+            f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
+            f"{self._model_file_name_prefix} {len(output_cols_types)} {self._is_lake_system} " \
+            f"{return_columns_python_types}"
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1294,6 +1818,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             return_types += self._get_return_columns_for_function_(data,
                                                                    feature_columns,
                                                                    label_columns,
+                                                                   new_partition_columns,
                                                                    func_name,
                                                                    {})
         else:
@@ -1302,7 +1827,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             return_types += [(f"{self.class_name.lower()}_{func_name}_1", FLOAT())]
         file_name = "sklearn_fit_predict.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         data_column_types_str, partition_indices_str, _, new_partition_columns = \
             self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
@@ -1317,7 +1841,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get unique values in partitioning columns.
         self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1376,14 +1904,10 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                              skip_either_or_that=True)
         # Remove the kwargs data.
-        input_data = kwargs.pop("data", None)
-        partition_cols = kwargs.pop("partition_columns", None)
-        feature_cols = kwargs.pop("feature_columns", None)
-        label_cols = kwargs.pop("label_columns", None)
+        self._remove_data_related_args_from_kwargs(kwargs)
         if partition_columns:
             # kwargs are passed to kneighbors function. So, removing them from kwargs.
-            kwargs.pop("partition_columns")
             self._is_default_partition_value_fit = False
         # Generating new partition column name.
@@ -1395,7 +1919,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         args_str = self._get_kwargs_str(kwargs)
         file_name = "sklearn_neighbors.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1429,7 +1952,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get unique values in partitioning columns.
         self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1513,7 +2040,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                                 group_columns)
         file_name = "sklearn_model_selection_split.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1548,7 +2074,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get unique values in partitioning columns.
         self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1562,154 +2092,69 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return opt
-    def _get_returning_df(self, script_df, partition_column, returns):
-        """
-        Internal function to return the teradataml Dataframe except
-        partition_column.
-        """
-        if self._is_default_partition_value_fit:
-            # For single model case, partition column is internally generated
-            # and no point in returning it to the user.
-            # Extract columns from return types.
-            returning_cols = [col[0] for col in returns[len(partition_column):]]
-            return script_df.select(returning_cols)
-        return script_df
-class _SKLearnFunctionWrapper(_GenericObjectWrapper):
-    def __init__(self, module_name, func_name):
+class _FunctionWrapper(_GenericObjectWrapper):
+    def __init__(self, module_name, func_name, file_type, template_file):
         super().__init__()
-        self.__module_name = module_name
-        self.__func_name = func_name
-        self.__params = None
-        self.__data_args = OrderedDict()
-        self._model_file_name = _generate_new_name(type="file_function", extension="py")
+        self._module_name = module_name
+        self._func_name = func_name
+        self._params = None
+        self._data_args = OrderedDict()
+        self._template_file = template_file
+        self._script_file_name = _generate_new_name(type=file_type, extension="py")
     def __call__(self, **kwargs):
         """
         Run the function with all the arguments passed from `td_sklearn.<function_name>` function.
         """
-        __data_columns = []
-        partition_cols = self._get_columns_as_list(kwargs.get("partition_columns", None))
-        if partition_cols:
-            kwargs.pop("partition_columns")
-        # Separate dataframe related arguments and their column names from actual kwargs.
-        for k, v in kwargs.items():
-            if isinstance(v, DataFrame):
-                # All dataframes should be select of parent dataframe.
-                _validate_df_query_type(v, "select", k)
-                # Save all columns in dataframe related arguments.
-                __data_columns.extend(v.columns)
-                self.__data_args[k] = v
+        replace_dict, partition_cols = self._process_data_for_funcs_returning_objects(kwargs)
+        script_file_path = f"{self._script_file_name}" if self._is_lake_system \
+            else f"./{self._db_name}/{self._script_file_name}"
-        # Get common parent dataframe from all dataframes.
-        self.__tdml_df =  DataFrameUtils()._get_common_parent_df_from_dataframes(list(self.__data_args.values()))
-        self._validate_existence_of_partition_columns(partition_cols, self.__tdml_df.columns)
-        self.__tdml_df = self.__tdml_df.select(__data_columns + partition_cols)
-        self.__tdml_df, partition_cols = self._get_data_and_data_partition_columns(self.__tdml_df,
-                                                                                   __data_columns,
-                                                                                   [],
-                                                                                   partition_cols
-                                                                                   )
-        # Prepare string of data arguments with name, indices where columns of that argument resides
-        # and types of each of the column.
-        data_args_str = self._prepare_data_args_string(kwargs)
+        model_file_prefix = None
+        if self._is_lake_system:
+            model_file_prefix = self._script_file_name.replace(".py", "")
-        self.__params = kwargs
+        py_exc = UtilFuncs._get_python_execution_path()
+        script_command = f"{py_exc} {script_file_path} {model_file_prefix} {self._is_lake_system}"
-        # Get indices of partition_columns and types of all columns.
-        data_column_types_str, partition_indices_str, _, partition_cols = \
-            self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df, partition_cols)
+        model_type = BLOB() if self._is_lake_system else CLOB()
-        script_file_path = f"{self._model_file_name}" if self._is_lake_system \
-            else f"./{self._db_name}/{self._model_file_name}"
-        py_exc = UtilFuncs._get_python_execution_path()
-        script_command = f"{py_exc} {script_file_path} {partition_indices_str} {data_column_types_str} {data_args_str}"
+        return_types = [(col, self._tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
+                        for col in partition_cols] + [(self._func_name, model_type)]
-        return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
-                        for col in partition_cols] + [(self.__func_name, CLOB())]
+        replace_dict.update({"<module_name>": self._module_name,
+                             "<func_name>": self._func_name,
+                             "<params>": json.dumps(kwargs)})
         # Generate new file in .teradataml directory and install it to Vantage.
-        self._prepare_and_install_file()
+        self._prepare_and_install_file(replace_dict=replace_dict)
+        try:
+            self._model_data = self._run_script(self._tdml_df, script_command, partition_cols, return_types)
+            self._model_data._index_label = None
-        self._model_data = self._run_script(self.__tdml_df, script_command, partition_cols, return_types)
-        self._model_data._index_label = None
+            fit_partition_unique_values = self._tdml_df.drop_duplicate(partition_cols).get_values()
-        fit_partition_unique_values = self.__tdml_df.drop_duplicate(partition_cols).get_values()
+            self._extract_model_objs(n_unique_partitions=len(fit_partition_unique_values),
+                                     n_partition_cols=len(partition_cols))
-        self.extract_sklearn_obj(n_unique_partitions=len(fit_partition_unique_values),
-                                 n_partition_cols=len(partition_cols))
+        except Exception as ex:
+            # File cleanup if script execution fails or unable to fetch modelObj.
+            os.remove(self._script_file_local)
+            self._remove_script_file(self._script_file_name)
+            raise
         # File cleanup after processing.
-        os.remove(self._model_file_local)
-        remove_file(file_identifier=self._model_file_name.split(".")[0], suppress_output=True,
-                    force_remove=True)
+        os.remove(self._script_file_local)
+        self._remove_script_file(self._script_file_name)
         return self.modelObj
-    def _prepare_data_args_string(self, kwargs):
-        """
-        Get column indices and types of each data related arguments in the format:
-        "{<arg_name>-<comma separated indices>-<comma separated types>}--
-         {<arg_name>-<comma separated indices>-<comma separated types>}"
-        """
-        data_args_str = []
-        for arg_name in list(self.__data_args.keys()):
-            # Remove DataFrame arguments from kwargs, which will be passed to Script.
-            kwargs.pop(arg_name)
-            # Get column indices and their types for each dataframe from parent dataframe.
-            _, partition_indices_str, partition_types_str, _ = \
-                self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df,
-                                                                   self.__data_args[arg_name].columns,
-                                                                   idx_delim=",",
-                                                                   types_delim=",")
-            # Format "<arg_name>-<comma separated indices>-<comma separated types>"
-            data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
-        # Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
-        #    {<arg_name>-<comma separated indices>-<comma separated types>}"
-        return "--".join(data_args_str)
-    def _validate_existence_of_partition_columns(self, partition_columns, all_columns):
-        """
-        Validate if columns in "partition_columns" argument are present in any of the given
-        dataframes.
-        """
-        invalid_part_cols = [c for c in partition_columns if c not in all_columns]
-        if invalid_part_cols:
-            raise ValueError(Messages.get_message(MessageCodes.INVALID_PARTITIONING_COLS,
-                                                  ", ".join(invalid_part_cols),
-                                                  "', '".join(list(self.__data_args.keys())))
-                                                  )
-    def _prepare_and_install_file(self):
-        """
-        Prepare function script file from template file and install it in Vaantage.
-        """
-        with open(os.path.join(self._scripts_path, "sklearn_function.template")) as fp:
-            script_data = fp.read()
-        script_data = script_data.replace("<module_name>",self.__module_name).\
-            replace("<func_name>",self.__func_name).replace("<params>", json.dumps(self.__params))
-        self._model_file_local = os.path.join(self._tdml_tmp_dir, self._model_file_name)
-        with open(self._model_file_local, "w") as fp:
-            fp.write(script_data)
-        self._install_script_file(file_identifier=self._model_file_name.split(".")[0],
-                                  file_name=self._model_file_name,
-                                  file_location=self._tdml_tmp_dir)
+class _SKLearnFunctionWrapper(_FunctionWrapper):
+    def __init__(self, module_name, func_name):
+        file_type = "file_fn_sklearn"
+        template_file = "sklearn_function.template"
+        super().__init__(module_name, func_name, file_type=file_type, template_file=template_file)

teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.3py3-none-any.whl