PyPI - teradataml - Versions diffs - 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl - Mend

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show

teradataml/LICENSE.pdf +0 -0
teradataml/README.md +112 -0
teradataml/__init__.py +6 -3
teradataml/_version.py +1 -1
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +224 -16
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +1 -0
teradataml/analytics/json_parser/utils.py +6 -4
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +10 -2
teradataml/analytics/table_operator/__init__.py +3 -2
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +62 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1502 -323
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +245 -306
teradataml/automl/data_transformation.py +32 -12
teradataml/automl/feature_engineering.py +313 -82
teradataml/automl/model_evaluation.py +44 -35
teradataml/automl/model_training.py +109 -146
teradataml/catalog/byom.py +8 -8
teradataml/clients/pkce_client.py +1 -1
teradataml/common/constants.py +37 -0
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +151 -120
teradataml/common/messagecodes.py +4 -1
teradataml/common/messages.py +2 -1
teradataml/common/sqlbundle.py +1 -1
teradataml/common/utils.py +97 -11
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +72 -2
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/dataframe_example.json +10 -0
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scripts/deploy_script.py +1 -1
teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
teradataml/data/star_pivot.csv +8 -0
teradataml/data/templates/open_source_ml.json +2 -1
teradataml/data/teradataml_example.json +20 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/dataframe/copy_to.py +1 -1
teradataml/dataframe/data_transfer.py +5 -3
teradataml/dataframe/dataframe.py +474 -41
teradataml/dataframe/fastload.py +3 -3
teradataml/dataframe/functions.py +339 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +658 -20
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +322 -16
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
teradataml/options/__init__.py +3 -1
teradataml/options/configure.py +14 -2
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +10 -6
teradataml/scriptmgmt/lls_utils.py +3 -2
teradataml/table_operators/Script.py +2 -2
teradataml/table_operators/TableOperator.py +106 -20
teradataml/table_operators/table_operator_util.py +88 -41
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/validators.py +1 -1
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0

teradataml/opensource/sklearn/_sklearn_wrapper.py CHANGED Viewed

@@ -49,7 +49,7 @@ from teradataml.opensource.sklearn.constants import OpenSourcePackage, _OSML_MOD
 from teradataml.common.messagecodes import MessageCodes
 from teradataml.common.messages import Messages
 from teradataml.catalog.byom import save_byom, retrieve_byom, delete_byom
-from teradataml.dbutils.dbutils import _create_table
+from teradataml.dbutils.dbutils import _create_table, set_session_param
 from teradataml.utils.validators import _Validators
 from teradataml.dataframe.dataframe import DataFrame
 from teradataml.dataframe.dataframe_utils import DataFrameUtils
@@ -64,6 +64,10 @@ validator = _Validators()
 installed_model_files = defaultdict(int)
+## Flag to ensure the sklearn script
+## installation occurs only once.
+_file_installed = False
 class _GenericObjectWrapper:
     def __init__(self) -> None:
         self._db_name = _get_current_databasename()
@@ -86,43 +90,24 @@ class _GenericObjectWrapper:
             if configure.openml_user_env is not None:
                 self._env = configure.openml_user_env
             else:
-                self._create_or_get_env()
+                self._env = UtilFuncs._create_or_get_env("open_source_ml.json")
         else:
-            execute_sql(f"SET SESSION SEARCHUIFDBPATH = {self._db_name};")
-    def _create_or_get_env(self):
-        """
-        Internal function to return the env if already exists else
-        creates the environment using template file and return the env.
-        """
-        # Get the template file path.
-        template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates",
-                                         "open_source_ml.json")
+            set_session_param("searchuifdbpath",self._db_name)
-        # Read template file.
-        with open(template_dir_path, "r") as r_file:
-            data = json.load(r_file)
+        global _file_installed
+        ## Flag to check whether trained model is installed or not.
+        self._is_trained_model_installed = False
-        # Get env_name.
-        _env_name = data["env_specs"][0]["env_name"]
+        ## Install all sklearn script files on Vantage.
+        if not _file_installed:
+            sklearn_script_files = ["sklearn_fit.py", "sklearn_score.py",
+                                    "sklearn_transform.py", "sklearn_fit_predict.py",
+                                    "sklearn_neighbors.py", "sklearn_model_selection_split.py"]
+            for script_file in sklearn_script_files:
+                self._install_script_file(file_identifier=script_file.split(".")[0],
+                                          file_name=script_file)
-        try:
-            # Call function to 'openml_env' get env.
-            self._env = get_env(_env_name)
-        except TeradataMlException as tdml_e:
-            # We will get here when error says, env does not exist otherwise raise the exception as is.
-            # Env does not exist so create one.
-            exc_msg = "Failed to execute get_env(). User environment '{}' not " \
-                      "found.".format(_env_name)
-            if exc_msg in tdml_e.args[0]:
-                print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
-                      "latest supported python and required packages.")
-                _env = create_env(template=template_dir_path)
-            else:
-                raise tdml_e
-        except Exception as exc:
-            raise exc
+            _file_installed = True
     def _get_columns_as_list(self, cols):
         """
@@ -205,13 +190,31 @@ class _GenericObjectWrapper:
                                   is_binary=is_binary)
         else:
             status = self._env.install_file(file_path=new_script,
-                                       replace=True,
-                                       suppress_output=True)
+                                            replace=True,
+                                            suppress_output=True)
         if not status:
             raise TeradataMlException(
                 f"Script file '{file_name}' failed to get installed/replaced in Vantage."
             )
+    def _remove_script_file(self, file_name):
+        """
+        Internal function to remove script file in Vantage.
+        """
+        # _env is set while object creation
+        # If not set, it is Vantage Enterprise. Otherwise, it is Vantage Lake.
+        if not self._is_lake_system:
+            status = remove_file(file_identifier=file_name.split(".")[0],
+                                 force_remove=True,
+                                 suppress_output=True)
+        else:
+            status = self._env.remove_file(file_name=file_name,
+                                           suppress_output=True)
+        if not status:
+            raise TeradataMlException(
+                f"Script file '{file_name}' failed to remove in Vantage."
+            )
     def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
                                                                 idx_delim=",",
                                                                 types_delim="--"):
@@ -370,6 +373,23 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
         Internal function to get attributes of all sklearn model objects when multiple models are
         generated by fit.
         """
+        def __generate_model_object(model_obj_value):
+            """
+            Internal function to generate _SkLearnWrapperObject model object from model_obj_value.
+            """
+            # Create _SkLearnObjectWrapper object from opensource model object.
+            model_obj = self.__class__(model=first_atrribute_instance)
+            model_obj.modelObj = model_obj_value
+            model_obj._is_model_installed = True
+            # Setting other model attributes.
+            model_obj._is_default_partition_value_fit = self._is_default_partition_value_fit
+            model_obj._is_default_partition_value_predict = self._is_default_partition_value_predict
+            model_obj._fit_partition_colums_non_default = self._fit_partition_colums_non_default
+            model_obj._fit_partition_unique_values = self._fit_partition_unique_values
+            return model_obj
         # Wrapper function to invoke dynamic method, using arguments
         # passed by user, on model in each row.
         def __sklearn_method_invoker_for_multimodel(*c, **kwargs):
@@ -377,36 +397,58 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
             for i in range(multi_models.shape[0]):
                 curr_model = multi_models.iloc[i]["model"]
                 multi_models.at[i, "model"] = getattr(curr_model, name)(*c, **kwargs)
+            first_function_instance = multi_models.at[0, "model"]
+            if self.__class__._validate_model_supportability(first_function_instance):
+                return __generate_model_object(multi_models)
             return multi_models.rename(columns={"model": name})
-        # Identify if attribute is callable or not to avoid
-        # this check in loop for every model.
-        is_attr_callable = False
         # Assuming that self.modelObj will have at least 1 row.
-        is_attr_callable = callable(getattr(self.modelObj.iloc[0]["model"], name))
-        # If attribute is callable, it should be applied on model in each row
+        # Get attribute instance from first model object.
+        first_atrribute_instance = getattr(self.modelObj.iloc[0]["model"], name)
+        # If first_atrribute_instance is callable, it should be applied on model in each row
         # using passed arguments.
-        if is_attr_callable:
+        if callable(first_atrribute_instance):
             return __sklearn_method_invoker_for_multimodel
         output_attributes = self.modelObj.copy()
         for i in range(output_attributes.shape[0]):
             model = output_attributes.iloc[i]["model"]
             output_attributes.at[i, "model"] = getattr(model, name)
+        if self.__class__._validate_model_supportability(first_atrribute_instance):
+            return __generate_model_object(output_attributes)
         return output_attributes.rename(columns={"model": name})
     def __getattr__(self, name):
         # This just run attributes (functions and properties) from sklearn object.
         def __sklearn_method_invoker(*c, **kwargs):
-            return atrribute_instance(*c, **kwargs)
+            # sklearn model is returned from the function call. Create _SkLearnObjectWrapper object.
+            model_obj = attribute_instance(*c, **kwargs)
+            if self.__class__._validate_model_supportability(model_obj):
+                model_obj = self.__class__(model=model_obj)
+                model_obj._is_model_installed = True # Trained model is returned by function call.
+            return model_obj
         if isinstance(self.modelObj, pd.DataFrame):
             return self.__get_obj_attributes_multi_model(name)
-        atrribute_instance = getattr(self.modelObj, name)
-        if callable(atrribute_instance):
+        attribute_instance = getattr(self.modelObj, name)
+        if callable(attribute_instance):
             return __sklearn_method_invoker
-        return atrribute_instance
+        if self.__class__._validate_model_supportability(attribute_instance):
+            # sklearn model is returned from the attribute. Create _SkLearnObjectWrapper object.
+            model_obj = self.__class__(model=attribute_instance)
+            model_obj._is_model_installed = True # Trained model is returned as attribute.
+            return model_obj
+        return attribute_instance
     @classmethod
     def _validate_model_supportability(cls, model):
@@ -417,15 +459,25 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
         error_msg = Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED, "validate",
                                          "The given model is not a supported opensource model.")
         msg_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
+        package_name = None
+        class_name = None
         try:
             # For scikit-learn, model.__module__ is similar to 'sklearn.linear_model._base'.
             # TODO: check for other supported packages.
-            if model.__module__.split(".")[0] not in OpenSourcePackage.values():
-                raise TeradataMlException(error_msg, msg_code)
+            if hasattr(model, "__module__"):
+                package_name = model.__module__.split(".")[0]
+                if package_name not in OpenSourcePackage.values():
+                    return False
+            if hasattr(model, "__class__"):
+                class_name = model.__class__.__name__
         except Exception as ex:
             # If in case, model.__module__ fails.
             raise TeradataMlException(error_msg, msg_code) from ex
+        # True only if package name is opensource package name and class name is not internal class.
+        return True if package_name and class_name and \
+            package_name == cls.OPENSOURCE_PACKAGE_NAME.value and not class_name.startswith("_") else False
     def _save_model(self, model_name, replace_if_exists=False):
         """
         Internal function to save the model stored in file at location mentioned by class variable
@@ -436,7 +488,8 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
         conn = get_connection()
         osml_models_table_exists = conn.dialect.has_table(conn,
                                                           table_name=_OSML_MODELS_TABLE_NAME,
-                                                          schema=self._db_name)
+                                                          schema=self._db_name,
+                                                          table_only=True)
         if not osml_models_table_exists:
             all_columns = _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT.copy()
             all_columns.update(_OSML_ADDITIONAL_COLUMN_TYPES)
@@ -484,7 +537,11 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
         Internal function to create an instance of the class using the model and deploy
         the model to Vantage.
         """
-        cls._validate_model_supportability(model=model)
+        is_model_supportable = cls._validate_model_supportability(model=model)
+        if not is_model_supportable:
+            raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED,
+                                                           "deploy", "The given model is not a supported opensource model."),
+                                      MessageCodes.MODEL_CATALOGING_OPERATION_FAILED)
         cls = cls(model=model)
         # Load the model file into Vantage node as file can be used in
@@ -830,7 +887,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                         for col in new_partition_columns] + [("model", model_type)]
         file_name = "sklearn_fit.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         if classes:
             class_type = type(classes[0]).__name__
@@ -865,6 +921,13 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         self._fit_label_columns_types = [data._td_column_names_and_sqlalchemy_types[l_c.lower()]
                                          for l_c in label_columns]
+        # If the model is trained a second time after the object creation,
+        # or if set_params() is called after the first model training,
+        # this flag will reset to False. So that for subsequent predict/score
+        # operations, the newly trained model will be installed.
+        if self._is_trained_model_installed:
+            self._is_trained_model_installed = False
     def partial_fit(self, X=None, y=None, classes=None, **kwargs):
         """
         Please check the description in Docs/OpensourceML/sklearn.py.
@@ -1122,7 +1185,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
             #                       Error while type casting for column '2'"
             # Hence, using pd.Int64Dtype() for integer columns with nan values.
-            types[col] = type_ if type_ != numpy.int64 else pd.Int64Dtype()
+            types[col] = type_ if type_ not in [int, numpy.int64] else pd.Int64Dtype()
         # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
         opt_pd = opt_pd.astype(types)
@@ -1161,7 +1224,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                                 partition_columns)
         file_name = "sklearn_score.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1180,7 +1242,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
                         for col in new_partition_columns] + [(func_name, FLOAT())]
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1229,7 +1295,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             kwargs.pop("label_columns")
         file_name = "sklearn_transform.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1251,14 +1316,18 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         if func_name in ["predict", "decision_function"] and label_columns:
             return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
                              for col in label_columns]
         return_types += self._get_return_columns_for_function_(data,
                                                                feature_columns,
                                                                label_columns,
                                                                func_name,
                                                                kwargs)
-        # Installing model files before running sklearn_transform.py.
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1302,7 +1371,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
             return_types += [(f"{self.class_name.lower()}_{func_name}_1", FLOAT())]
         file_name = "sklearn_fit_predict.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         data_column_types_str, partition_indices_str, _, new_partition_columns = \
             self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
@@ -1317,7 +1385,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get unique values in partitioning columns.
         self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1395,7 +1467,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         args_str = self._get_kwargs_str(kwargs)
         file_name = "sklearn_neighbors.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1429,7 +1500,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get unique values in partitioning columns.
         self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1513,7 +1588,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
                                                                                 group_columns)
         file_name = "sklearn_model_selection_split.py"
-        self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
         script_file_path = f"{file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{file_name}"
@@ -1548,7 +1622,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
         # Get unique values in partitioning columns.
         self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
-        self._install_initial_model_file()
+        # Checking the trained model installation. If not installed,
+        # install it and set flag to True.
+        if not self._is_trained_model_installed:
+            self._install_initial_model_file()
+            self._is_trained_model_installed = True
         opt = self._run_script(data, script_command, new_partition_columns, return_types)
@@ -1633,11 +1711,19 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
         script_file_path = f"{self._model_file_name}" if self._is_lake_system \
             else f"./{self._db_name}/{self._model_file_name}"
+        model_file_prefix = None
+        if self._is_lake_system:
+            model_file_prefix = self._model_file_name.replace(".py", "")
         py_exc = UtilFuncs._get_python_execution_path()
-        script_command = f"{py_exc} {script_file_path} {partition_indices_str} {data_column_types_str} {data_args_str}"
+        script_command = (f"{py_exc} {script_file_path} {partition_indices_str} "\
+                          f"{data_column_types_str} {data_args_str} {self._is_lake_system}"\
+                          f" {model_file_prefix}")
-        return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
-                        for col in partition_cols] + [(self.__func_name, CLOB())]
+        model_type = BLOB() if self._is_lake_system else CLOB()
+        return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
+                        for col in partition_cols] + [(self.__func_name, model_type)]
         # Generate new file in .teradataml directory and install it to Vantage.
         self._prepare_and_install_file()
@@ -1652,8 +1738,7 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
         # File cleanup after processing.
         os.remove(self._model_file_local)
-        remove_file(file_identifier=self._model_file_name.split(".")[0], suppress_output=True,
-                    force_remove=True)
+        self._remove_script_file(self._model_file_name)
         return self.modelObj
@@ -1697,7 +1782,7 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
     def _prepare_and_install_file(self):
         """
-        Prepare function script file from template file and install it in Vaantage.
+        Prepare function script file from template file and install it in Vantage.
         """
         with open(os.path.join(self._scripts_path, "sklearn_function.template")) as fp:
             script_data = fp.read()

teradataml/options/__init__.py CHANGED Viewed

@@ -1,11 +1,13 @@
+from teradataml.common.deprecations import argument_deprecation
 from teradataml.common.exceptions import TeradataMlException
 from teradataml.common.messagecodes import MessageCodes
 from teradataml.common.messages import Messages
 from teradataml.options.configure import configure
 from teradataml.utils.internal_buffer import _InternalBuffer
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
+@argument_deprecation("future", ["auth_token", "ues_url"], False, None)
 @collect_queryband(queryband="StCnfgPrms")
 def set_config_params(**kwargs):
     """

teradataml/options/configure.py CHANGED Viewed

@@ -58,6 +58,7 @@ class _Configure(_ConfigureSuper):
     indb_install_location = _create_property('indb_install_location')
     openml_user_env = _create_property('openml_user_env')
     local_storage = _create_property('local_storage')
+    stored_procedure_install_location = _create_property('stored_procedure_install_location')
     def __init__(self, default_varchar_size=1024, column_casesensitive_handler = False,
                  vantage_version="vantage1.1", val_install_location=None,
@@ -66,7 +67,7 @@ class _Configure(_ConfigureSuper):
                  read_nos_function_mapping="read_nos", write_nos_function_mapping="write_nos",
                  cran_repositories=None, inline_plot=True,
                  indb_install_location="/var/opt/teradata/languages/sles12sp3/Python/",
-                 openml_user_env=None, local_storage=None):
+                 openml_user_env=None, local_storage=None, stored_procedure_install_location="SYSLIB"):
         """
         PARAMETERS:
@@ -170,6 +171,16 @@ class _Configure(_ConfigureSuper):
                 Example:
                     # Set the garbage collector location to "/Users/gc/"
                     teradataml.options.configure.local_storage = "/Users/gc/"
+            stored_procedure_install_location:
+                Specifies the name of the database where stored procedures
+                are installed.
+                Types: string
+                Example:
+                    # Set the Stored Procedure install location to 'SYSLIB'
+                    # when stored procedures are installed in 'SYSLIB'.
+                    teradataml.options.configure.stored_procedure_install_location = "SYSLIB"
         """
         super().__init__()
         super().__setattr__('default_varchar_size', default_varchar_size)
@@ -187,6 +198,7 @@ class _Configure(_ConfigureSuper):
         super().__setattr__('indb_install_location', indb_install_location)
         super().__setattr__('openml_user_env', openml_user_env)
         super().__setattr__('local_storage', local_storage)
+        super().__setattr__('stored_procedure_install_location', stored_procedure_install_location)
         # internal configurations
         # These configurations are internal and should not be
@@ -301,7 +313,7 @@ class _Configure(_ConfigureSuper):
                           'read_nos_function_mapping', 'write_nos_function_mapping',
                           '_byom_model_catalog_database', '_byom_model_catalog_table',
                           '_byom_model_catalog_license', '_byom_model_catalog_license_source',
-                          'indb_install_location', 'local_storage']:
+                          'indb_install_location', 'local_storage', 'stored_procedure_install_location']:
                 if not isinstance(value, str):
                     raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
                                                                    'str'),

teradataml/options/display.py CHANGED Viewed

@@ -58,7 +58,7 @@ class _Display(_DisplaySuper):
                  byte_encoding = 'base16',
                  print_sqlmr_query = False,
                  blob_length=10,
-                 suppress_vantage_runtime_warnings=False,
+                 suppress_vantage_runtime_warnings=True,
                  geometry_column_length=30):
         """
         PARAMETERS:
@@ -111,7 +111,7 @@ class _Display(_DisplaySuper):
                 Specifies whether to display the warnings raised by the Vantage or not.
                 When set to True, warnings raised by Vantage are not displayed.
                 Otherwise, warnings are displayed.
-                Default Value: False
+                Default Value: True
                 Types: bool
                 Example:
                     display.suppress_vantage_runtime_warnings = True

teradataml/plot/axis.py CHANGED Viewed

@@ -951,7 +951,7 @@ class Axis:
             # Execute the node and create the table in Vantage.
             if self.__y_axis_data[0]._parent_df._table_name is None:
                 # Assuming all the columns are from same DataFrame.
-                repr(self.__y_axis_data[0]._parent_df)
+                self.__y_axis_data[0]._parent_df.materialize()
             series = TDSeries(data=_df if self.ignore_nulls else self.__x_axis_data[0]._parent_df,
                               id=self.__series_identifier,
@@ -994,7 +994,7 @@ class Axis:
         # Remove null values from DataFrame
         if self.ignore_nulls:
             _df = _df.dropna()
-        repr(_df)
+        _df.materialize()
         series = TDSeries(data=_df,
                           id="id",
                           row_index="x",
@@ -1049,7 +1049,7 @@ class Axis:
             # Execute the node and create the table/view in Vantage.
             if self.__y_axis_data[0]._parent_df._table_name is None:
-                repr(self.__y_axis_data[0]._parent_df)
+                self.__y_axis_data[0]._parent_df.materialize()
             matrix = TDMatrix(data=_df if self.ignore_nulls else self.__x_axis_data[0]._parent_df,
                               id=self.__series_identifier,
@@ -1077,7 +1077,7 @@ class Axis:
         # Remove null values from DataFrame
         if self.ignore_nulls:
             _df = _df.dropna()
-        repr(_df)
+        _df.materialize()
         matrix = TDMatrix(data=_df,
                           id="id",
                           row_index="x",

teradataml/scriptmgmt/UserEnv.py CHANGED Viewed

@@ -32,7 +32,7 @@ from teradataml.common.messagecodes import MessageCodes
 from teradataml.common.utils import UtilFuncs
 from teradataml.utils.validators import _Validators
 from urllib.parse import urlparse
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
 def _get_ues_url(env_type="users", **kwargs):
@@ -535,6 +535,8 @@ class UserEnv:
         # Check if file exists or not.
         _Validators._validate_file_exists(file_path)
+        # Check if file is empty or not.
+        _Validators._check_empty_file(file_path)
         try:
             # If file size is more than 10 MB, upload the file to cloud and export it to UES.
@@ -681,11 +683,11 @@ class UserEnv:
                        "x-ms-blob-type": CloudProvider.X_MS_BLOB_TYPE.value}
         # Initiate file upload to cloud.
-        response = UtilFuncs._http_request(cloud_storage_url,
-                                           HTTPRequest.PUT,
-                                           data=UtilFuncs._get_file_contents(file_path,
-                                                                             read_in_binary_mode=True),
-                                           headers=headers)
+        with open(file_path, 'rb') as fp:
+            response = UtilFuncs._http_request(cloud_storage_url,
+                                               HTTPRequest.PUT,
+                                               data=fp,
+                                               headers=headers)
         # Since the API is not for UES, it is better to validate and raise error separately.
         if not (200 <= response.status_code < 300):
@@ -3521,6 +3523,8 @@ class UserEnv:
             user environment created in Vantage Languages Ecosystem. If
             model with same name already exists in the remote user
             environment, error is thrown.
+            Note:
+                Maximum size of the model should be less than or equal to 5GB.
         PARAMETERS:
             model_path:

teradataml/scriptmgmt/lls_utils.py CHANGED Viewed

@@ -38,7 +38,7 @@ import warnings
 import webbrowser
 from urllib.parse import parse_qs, urlparse
 from teradataml.utils.utils import _async_run_id_info
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
 @collect_queryband(queryband="LstBsEnv")
@@ -1121,7 +1121,8 @@ def get_env(env_name):
         # Get environments created by the current logged in user.
         user_envs_df = list_user_envs()
-        if env_name not in user_envs_df.env_name.values:
+        if (user_envs_df is None or
+                (not user_envs_df.empty and env_name not in user_envs_df.env_name.values)):
             msg_code = MessageCodes.FUNC_EXECUTION_FAILED
             error_msg = Messages.get_message(msg_code, "get_env()", "User environment '{}' not found."
                                                                     " Use 'create_env()' function to create"

teradataml/table_operators/Script.py CHANGED Viewed

@@ -431,8 +431,8 @@ class Script(TableOperator):
             from teradataml import list_td_reserved_keywords
             if get_connection():
                 # Checking for reserved keywords and raising error if present.
-                for column_name in self.returns:
-                    list_td_reserved_keywords(key=column_name, raise_error=True)
+                columns = self.returns
+                list_td_reserved_keywords(key=columns, raise_error=True)
     def __validate(self):
         """

teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.2py3-none-any.whl