PyPI - teradataml - Versions diffs - 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl - Mend

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +183 -0
teradataml/__init__.py +6 -3
teradataml/_version.py +2 -2
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +275 -40
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +1 -0
teradataml/analytics/json_parser/utils.py +17 -21
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +10 -2
teradataml/analytics/table_operator/__init__.py +3 -2
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +62 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1553 -319
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +276 -319
teradataml/automl/data_transformation.py +163 -81
teradataml/automl/feature_engineering.py +402 -239
teradataml/automl/feature_exploration.py +9 -2
teradataml/automl/model_evaluation.py +48 -51
teradataml/automl/model_training.py +291 -189
teradataml/catalog/byom.py +8 -8
teradataml/catalog/model_cataloging_utils.py +1 -1
teradataml/clients/auth_client.py +133 -0
teradataml/clients/pkce_client.py +1 -1
teradataml/common/aed_utils.py +3 -2
teradataml/common/constants.py +48 -6
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +156 -120
teradataml/common/messagecodes.py +6 -1
teradataml/common/messages.py +3 -1
teradataml/common/sqlbundle.py +1 -1
teradataml/common/utils.py +103 -11
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +121 -31
teradataml/data/advertising.csv +201 -0
teradataml/data/bank_marketing.csv +11163 -0
teradataml/data/bike_sharing.csv +732 -0
teradataml/data/boston2cols.csv +721 -0
teradataml/data/breast_cancer.csv +570 -0
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/customer_segmentation_test.csv +2628 -0
teradataml/data/customer_segmentation_train.csv +8069 -0
teradataml/data/dataframe_example.json +10 -0
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/glm_example.json +28 -1
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/housing_train_segment.csv +201 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/insect2Cols.csv +61 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
teradataml/data/kmeans_example.json +5 -0
teradataml/data/kmeans_table.csv +10 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/onehot_encoder_train.csv +4 -0
teradataml/data/openml_example.json +29 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scale_attributes.csv +3 -0
teradataml/data/scale_example.json +52 -1
teradataml/data/scale_input_part_sparse.csv +31 -0
teradataml/data/scale_input_partitioned.csv +16 -0
teradataml/data/scale_input_sparse.csv +11 -0
teradataml/data/scale_parameters.csv +3 -0
teradataml/data/scripts/deploy_script.py +21 -2
teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
teradataml/data/star_pivot.csv +8 -0
teradataml/data/templates/open_source_ml.json +2 -1
teradataml/data/teradataml_example.json +97 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/data/ztest_example.json +16 -0
teradataml/dataframe/copy_to.py +9 -4
teradataml/dataframe/data_transfer.py +125 -64
teradataml/dataframe/dataframe.py +575 -57
teradataml/dataframe/dataframe_utils.py +47 -9
teradataml/dataframe/fastload.py +273 -90
teradataml/dataframe/functions.py +339 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +740 -18
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +324 -18
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
teradataml/options/__init__.py +16 -5
teradataml/options/configure.py +39 -6
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +26 -19
teradataml/scriptmgmt/lls_utils.py +120 -16
teradataml/table_operators/Script.py +4 -5
teradataml/table_operators/TableOperator.py +160 -26
teradataml/table_operators/table_operator_util.py +88 -41
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/validators.py +41 -3
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0

teradataml/table_operators/TableOperator.py CHANGED Viewed

@@ -16,7 +16,7 @@ import os
 import time
 import uuid
 from math import floor
-import tarfile
+import warnings
 import subprocess
 from pathlib import Path
 import teradataml.dataframe as tdmldf
@@ -1012,16 +1012,24 @@ class TableOperator:
             repr_string = "{}\n\n{}".format(repr_string, self.result)
         return repr_string
-    def deploy(self, model_column, partition_columns=None, model_file_prefix=None):
+    def deploy(self, model_column, partition_columns=None, model_file_prefix=None, retry=3,
+               retry_timeout=30):
         """
         DESCRIPTION:
-            Function deploys the model generated after `execute_script()` in database or user
-            environment in lake.
+            Function deploys the models generated after running `execute_script()` in database in
+            VantageCloud Enterprise or in user environment in VantageCloud Lake.
+            If deployed files are not needed, these files can be removed using `remove_file()` in
+            database or `UserEnv.remove_file()` in lake.
+            Note:
+                If the models (one or many) fail to get deployed in Vantage even after retries,
+                try deploying them again using `install_file()` function or remove installed
+                files using `remove_file()` function.
         PARAMETERS:
             model_column:
                 Required Argument.
-                Specifies the column name in which model is present.
+                Specifies the column name in which models are present.
                 Supported types of model in this column are CLOB and BLOB.
                 Note:
                     The column mentioned in this argument should be present in
@@ -1049,13 +1057,31 @@ class TableOperator:
                       with underscore(_) to generate model file names.
                 Types: str
+            retry:
+                Optional Argument.
+                Specifies the maximum number of retries to be made to deploy the models.
+                This argument helps in retrying the deployment of models in case of network issues.
+                This argument should be a positive integer.
+                Default Value: 3
+                Types: int
+            retry_timeout:
+                Optional Argument. Used along with retry argument. Ignored otherwise.
+                Specifies the time interval in seconds between each retry.
+                This argument should be a positive integer.
+                Default Value: 30
+                Types: int
         RETURNS:
-            List of generated file names.
+            List of generated file identifiers in database or file names in lake.
         RAISES:
-            TeradatamlException
+            - TeradatamlException
+            - Throws warning when models failed to deploy even after retries.
         EXAMPLES:
+            >>> import teradataml
+            >>> from teradataml import load_example_data
             >>> load_example_data("openml", "multi_model_classification")
             >>> df = DataFrame("multi_model_classification")
@@ -1073,12 +1099,16 @@ class TableOperator:
             -0.615226 -0.546472  0.017496 -0.488720      0            12                   0                  10
              0.579671 -0.573365  0.160603  0.014404      0             9                   1                  10
+            ## Run in VantageCloud Enterprise using Script object.
             # Install Script file.
             >>> file_location = os.path.join(os.path.dirname(teradataml.__file__), "data", "scripts", "deploy_script.py")
             >>> install_file("deploy_script", file_location, replace=True)
+            >>> execute_sql("SET SESSION SEARCHUIFDBPATH = <db_name>;")
             # Variables needed for Script execution.
-            >>> script_command = '/opt/teradata/languages/Python/bin/python3 ./ALICE/deploy_script.py'
+            >>> from teradataml import configure
+            >>> script_command = f'{configure.indb_install_location} ./<db_name>/deploy_script.py enterprise'
             >>> partition_columns = ["partition_column_1", "partition_column_2"]
             >>> columns = ["col1", "col2", "col3", "col4", "label",
                            "partition_column_1", "partition_column_2"]
@@ -1104,10 +1134,10 @@ class TableOperator:
             #            is auto generated.
             >>> obj.deploy(model_column="model",
                            partition_columns=["partition_column_1", "partition_column_2"])
-            >>> ['model_file_1710436227163427__0_10',
-                 'model_file_1710436227163427__1_10',
-                 'model_file_1710436227163427__0_11',
-                 'model_file_1710436227163427__1_11']
+            ['model_file_1710436227163427__0_10',
+             'model_file_1710436227163427__1_10',
+             'model_file_1710436227163427__0_11',
+             'model_file_1710436227163427__1_11']
             # Example 2: Provide only "model_file_prefix" argument. Here, filenames are suffixed
             #            with 1, 2, 3, ... for multiple models.
@@ -1131,15 +1161,74 @@ class TableOperator:
              'my_prefix_new__0_11',
              'my_prefix_new__1_10',
              'my_prefix_new__1_11']
+            # Example 5: Assuming that 2 model files fail to get installed due to network issues,
+            #            the function retries installing the failed files twice with timeout between
+            #            retries of 10 secs.
+            >>> opt = obj.deploy(model_column="model", model_file_prefix="my_prefix_",
+                                 partition_columns=["partition_column_1", "partition_column_2"],
+                                 retry=2, retry_timeout=10)
+            RuntimeWarning: The following model files failed to get installed in Vantage:
+            ['my_prefix__1_10', 'my_prefix__1_11'].
+            Try manually deploying them from the path '<temp_path>' using:
+                - `install_file()` when connected to Enterprise/On-Prem system or
+                - `UserEnv.install_file()` when connected to Lake system.
+            OR
+            Remove the returned installed files manually using `remove_file()` or `UserEnv.remove_file()`.
+            >>> opt
+            ['my_prefix__0_10',
+             'my_prefix__0_11']
+            ## Run in VantageCloud Lake using Apply object.
+            # Let's assume an user environment named "user_env" already exists in VantageCloud Lake,
+            # which will be used for the examples below.
+            # ApplyTableOperator returns BLOB type for model column as per deploy_script.py.
+            >>> returns = OrderedDict([("partition_column_1", INTEGER()),
+                                       ("partition_column_2", INTEGER()),
+                                       ("model", BLOB())])
+            # Install the script file which returns model and partition columns.
+            >>> user_env.install_file(file_location)
+            >>> script_command = 'python3 deploy_script.py lake'
+            >>> obj = Apply(data=df.select(columns),
+                            script_command=script_command,
+                            data_partition_column=partition_columns,
+                            returns=returns,
+                            env_name="user_env"
+                            )
+            >>> opt = obj.execute_script()
+            >>> opt
+            partition_column_1  partition_column_2               model                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    model
+                            0                  10   b'gAejc1.....drIr'
+                            0                  11   b'gANjcw.....qWIu'
+                            1                  10   b'abdwcd.....dWIz'
+                            1                  11   b'gA4jc4.....agfu'
+            # Example 6: Provide both "partition_columns" and "model_file_prefix" arguments.
+            >>> obj.deploy(model_column="model", model_file_prefix="my_prefix_",
+                           partition_columns=["partition_column_1", "partition_column_2"])
+            ['my_prefix__0_10',
+             'my_prefix__0_11',
+             'my_prefix__1_10',
+             'my_prefix__1_11']
+            # Other examples are similar to the examples provided for VantageCloud Enterprise.
         """
         arg_info_matrix = []
         arg_info_matrix.append(["model_column", model_column, False, (str)])
         arg_info_matrix.append(["partition_columns", partition_columns, True, (str, list)])
         arg_info_matrix.append(["model_file_prefix", model_file_prefix, True, (str)])
+        arg_info_matrix.append(["retry", retry, True, (int)])
+        arg_info_matrix.append(["retry_timeout", retry_timeout, True, (int)])
         _Validators._validate_function_arguments(arg_info_matrix)
+        _Validators._validate_positive_int(retry, "retry", lbound_inclusive=True)
+        _Validators._validate_positive_int(retry_timeout, "retry_timeout", lbound_inclusive=True)
         if self.result is None:
             return "Result is empty. Please run execute_script first."
@@ -1167,7 +1256,28 @@ class TableOperator:
         model_column_type = data._td_column_names_and_sqlalchemy_types[model_column.lower()].__class__.__name__
         n_models = len(vals)
-        all_files = []
+        # Default location for .teradataml is user's home directory if configure.local_storage is not set.
+        tempdir =  GarbageCollector._get_temp_dir_name()
+        def __install_file(model_file, model_file_path):
+            """
+            Function to install the model file in Vantage and return the status.
+            """
+            file_installed = True
+            try:
+                if self.__class__.__name__ == "Script":
+                    from teradataml.dbutils.filemgr import install_file
+                    install_file(file_identifier=model_file, file_path=model_file_path,
+                                is_binary=True, suppress_output=True, replace=True)
+                elif self.__class__.__name__ == "Apply":
+                    self.env.install_file(file_path=model_file_path, suppress_output=True, replace=True)
+            except Exception as e:
+                file_installed = False
+            return file_installed
+        installed_files = []
+        failed_files = []
         for i, row in enumerate(vals):
             model = row[0]
@@ -1178,7 +1288,7 @@ class TableOperator:
                 partition_values = str(i+1)
             model_file = f"{model_file_prefix}_{partition_values}"
-            model_file_path = os.path.join(os.path.expanduser("~"), ".teradataml", model_file)
+            model_file_path = os.path.join(tempdir, model_file)
             if model_column_type == "CLOB":
                 import base64
@@ -1193,15 +1303,39 @@ class TableOperator:
             with open(model_file_path, "wb") as f:
                 f.write(model)
-            if self.__class__.__name__ == "Script":
-                from teradataml import install_file
-                install_file(file_identifier=model_file, file_path=model_file_path,
-                             is_binary=True, suppress_output=True)
-            elif self.__class__.__name__ == "Apply":
-                self.env.install_file(file_name=model_file_path)
-            all_files.append(model_file)
+            file_installed = __install_file(model_file, model_file_path)
-            os.remove(model_file_path)
-        return all_files
+            if file_installed:
+                installed_files.append(model_file)
+                os.remove(model_file_path)
+            else:
+                # File failed to get installed in Vantage. Hence, keeping the file in tempdir.
+                failed_files.append(model_file)
+        while retry and failed_files:
+            # If there are any failed files and retry is not zero, retry installing the failed files.
+            time.sleep(retry_timeout)
+            retry_failed_files = []
+            for model_file in failed_files:
+                model_file_path = os.path.join(tempdir, model_file)
+                file_installed = __install_file(model_file, model_file_path)
+                if file_installed:
+                    installed_files.append(model_file)
+                    os.remove(model_file_path)
+                else:
+                    # File failed to get installed in Vantage. Hence, keeping the file in tempdir.
+                    retry_failed_files.append(model_file)
+            failed_files = retry_failed_files
+            retry -= 1
+        if failed_files:
+            failed_files.sort()
+            warning_message = "The following model files failed to get installed in Vantage:\n" + str(failed_files) + ".\n"
+            warning_message += "Try manually deploying them from the path '" + tempdir + "' using:\n"
+            warning_message += "    - `install_file()` when connected to Enterprise/On-Prem system or\n"
+            warning_message += "    - `UserEnv.install_file()` when connected to Lake system.\n"
+            warning_message += "OR\nRemove the returned installed files manually using `remove_file()` or `UserEnv.remove_file()`."
+            warnings.warn(RuntimeWarning(warning_message))
+        return installed_files

teradataml/table_operators/table_operator_util.py CHANGED Viewed

@@ -9,8 +9,7 @@
 # Description: Utilities for Table Operators.                      #
 #                                                                  #
 # ##################################################################
-import os
+import os, json
 import teradataml.dataframe as tdmldf
 from teradataml.common.constants import TableOperatorConstants, \
     TeradataConstants, OutputStyle
@@ -24,7 +23,7 @@ from teradataml.scriptmgmt.lls_utils import get_env
 from teradataml.utils.utils import execute_sql
 from teradataml.utils.validators import _Validators
 from functools import partial
-from inspect import isfunction
+from inspect import isfunction, getsource
 class _TableOperatorUtils:
@@ -281,12 +280,19 @@ class _TableOperatorUtils:
             self.__validate()
         """
         # Validate the user defined function.
-        if not (isfunction(self.user_function) or
-                isinstance(self.user_function, partial)):
-            raise TypeError(Messages.get_message(
-                MessageCodes.UNSUPPORTED_DATATYPE, 'user_function',
-                "'function' or 'functools.partial'")
-            )
+        if self.operation == TableOperatorConstants.UDF_OP.value:
+            for udf_function in self.user_function:
+                if not isfunction(udf_function):
+                    raise TypeError(Messages.get_message(
+                        MessageCodes.UNSUPPORTED_DATATYPE, 'user_function', "'function'"))
+        else:
+            if not (isfunction(self.user_function) or
+                    isinstance(self.user_function, partial)):
+                raise TypeError(Messages.get_message(
+                    MessageCodes.UNSUPPORTED_DATATYPE, 'user_function',
+                    "'function' or 'functools.partial'")
+                )
         if arg_info_matrix is None:
             arg_info_matrix = []
@@ -349,37 +355,73 @@ class _TableOperatorUtils:
                             os.path.dirname(os.path.abspath(__file__))),
                                             "table_operators",
                                             "templates")
-        template_name = TableOperatorConstants.APPLY_TEMPLATE.value if \
-            self.operation == TableOperatorConstants.APPLY_OP.value else TableOperatorConstants.MAP_TEMPLATE.value
+        # Get the template.
+        template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
+                    TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value}
+        template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
         # Write to the script based on the template.
         try:
             with open(os.path.join(template_dir, template_name), 'r') as input_file:
                 with open(self.script_path, 'w') as output_file:
-                    output_file.write(
-                        input_file.read().format(
-                            DELIMITER=UtilFuncs._serialize_and_encode(
-                                self.delimiter),
-                            STO_OPERATION=UtilFuncs._serialize_and_encode(
-                                self.operation),
-                            USER_DEF_FUNC=UtilFuncs._serialize_and_encode(
-                                self.user_function),
-                            DF_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
-                                self.data.columns),
-                            DF_COL_TYPES_LIST=UtilFuncs._serialize_and_encode(
-                                python_input_col_types),
-                            OUTPUT_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
-                                list(self.returns.keys())),
-                            OUTPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
-                                output_converters),
-                            QUOTECHAR=UtilFuncs._serialize_and_encode(
-                                self.quotechar),
-                            INPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
-                                input_converters),
-                            CHUNK_SIZE=UtilFuncs._serialize_and_encode(
-                                self.chunk_size)
+                    if self.operation == TableOperatorConstants.UDF_OP.value:
+                        # Function can have udf as decorator. Remove that.
+                        # The below notation
+                            # @udf
+                            # def to_upper(s):
+                            #     return s.upper()
+                            # Then source code will be as it is.
+                        # But if below notation is used,
+                            # f = udf(to_upper)
+                            # Then source code will not have udf.
+                        # So, remove first line if it comes with first notation.
+                        # For both notations if in starting function defination have any extra space. Remove that.
+                        # If multiple UDF's are there append them as a single string.
+                        user_function_code = ""
+                        for udf_code in self.user_function:
+                            udf_code = getsource(udf_code)
+                            udf_code = udf_code.lstrip()
+                            if udf_code.startswith("@"):
+                                udf_code = udf_code[udf_code.find("\n")+1: ].lstrip()
+                            user_function_code += udf_code + '\n'
+                        output_file.write(input_file.read().format(
+                            DELIMITER=self.delimiter,
+                            QUOTECHAR=self.quotechar,
+                            FUNCTION_DEFINITION=user_function_code,
+                            FUNCTION_ARGS =str(self.function_args),
+                            INPUT_COLUMNS=json.dumps(self.data.columns),
+                            OUTPUT_COLUMNS=json.dumps(list(self.returns.keys())),
+                            COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
+                            OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
+                        ))
+                    else:
+                        # prepare script file from template file for maprow and mappartition.
+                        output_file.write(
+                            input_file.read().format(
+                                DELIMITER=UtilFuncs._serialize_and_encode(
+                                    self.delimiter),
+                                STO_OPERATION=UtilFuncs._serialize_and_encode(
+                                    self.operation),
+                                USER_DEF_FUNC=UtilFuncs._serialize_and_encode(
+                                    self.user_function),
+                                DF_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
+                                    self.data.columns),
+                                DF_COL_TYPES_LIST=UtilFuncs._serialize_and_encode(
+                                    python_input_col_types),
+                                OUTPUT_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
+                                    list(self.returns.keys())),
+                                OUTPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
+                                    output_converters),
+                                QUOTECHAR=UtilFuncs._serialize_and_encode(
+                                    self.quotechar),
+                                INPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
+                                    input_converters),
+                                CHUNK_SIZE=UtilFuncs._serialize_and_encode(
+                                    self.chunk_size)
+                            )
                         )
-                    )
         except Exception:
             # We may end up here if the formatting of the templating to create
             # the user script fails.
@@ -410,9 +452,11 @@ class _TableOperatorUtils:
         """
         try:
             if self.operation in [TableOperatorConstants.MAP_ROW_OP.value,
-                                  TableOperatorConstants.MAP_PARTITION_OP.value]:
+                                  TableOperatorConstants.MAP_PARTITION_OP.value] or \
+                                  (self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'IN-DB'):
                 return self.__execute_script_table_operator()
-            elif self.operation == TableOperatorConstants.APPLY_OP.value:
+            elif self.operation == TableOperatorConstants.APPLY_OP.value or \
+                (self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'REMOTE'):
                 return self.__execute_apply_table_operator()
         except Exception:
             raise
@@ -572,8 +616,9 @@ class _TableOperatorUtils:
         if self.exec_mode.upper() == TableOperatorConstants.REMOTE_EXEC.value:
             # If not test mode, execute the script using Apply table operator.
             try:
-                # If APPLY, get environment and use it for installing file.
-                if self.operation == TableOperatorConstants.APPLY_OP.value:
+                # If APPLY or UDF, get environment and use it for installing file.
+                if self.operation in [TableOperatorConstants.APPLY_OP.value,
+                                      TableOperatorConstants.UDF_OP.value]:
                     self.__env.install_file(self.script_path, suppress_output=True)
                 # Execute the script.
@@ -617,13 +662,15 @@ class _TableOperatorUtils:
                         suppress_output=True)
         # For apply, remove file from remote user environment.
-        if self.operation == TableOperatorConstants.APPLY_OP.value:
+        if self.operation == TableOperatorConstants.APPLY_OP.value or \
+            (self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'REMOTE'):
             self.__env.remove_file(self.script_name, suppress_output=True)
         # Remove the entry from Garbage Collector
         if self.operation in [TableOperatorConstants.MAP_ROW_OP.value,
                               TableOperatorConstants.MAP_PARTITION_OP.value,
-                              TableOperatorConstants.APPLY_OP.value]:
+                              TableOperatorConstants.APPLY_OP.value,
+                              TableOperatorConstants.UDF_OP.value]:
             GarbageCollector._delete_object_entry(
                 object_to_delete=self.script_entry,
                 object_type=TeradataConstants.TERADATA_SCRIPT,

teradataml/table_operators/templates/dataframe_udf.template ADDED Viewed

@@ -0,0 +1,63 @@
+import sys, csv
+import datetime
+td_buffer = {{}}
+{FUNCTION_DEFINITION}
+function_args = {FUNCTION_ARGS}
+# Information that is required to help with the script usage.
+#  The delimiter to use with the input and output text.
+delimiter = "{DELIMITER}"
+#  The names of columns in the input teradataml DataFrame.
+_input_columns = {INPUT_COLUMNS}
+#  The names of columns in the output teradataml DataFrame.
+_output_columns = {OUTPUT_COLUMNS}
+#  The definition for new columns in output.
+columns_definitions = {COLUMNS_DEFINITIONS}
+#  The types of columns in the input/output teradataml DataFrame.
+output_type_converters = {OUTPUT_TYPE_CONVERTERS}
+for k,v in output_type_converters.items():
+    if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
+        output_type_converters[k] = 'str'
+output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
+#  The quotechar to use.
+quotechar = "{QUOTECHAR}"
+if quotechar == "None":
+    quotechar = None
+# The entry point to the script.
+if __name__ == "__main__":
+    records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
+    for record in records:
+        record = dict(zip(_input_columns, record))
+        out_rec = []
+        for column in _output_columns:
+            # If it is a new column, get the value from definition.
+            if column in columns_definitions:
+                f_args = tuple()
+                # Convert the argument types first.
+                for v in function_args[column]:
+                    if v in _input_columns:
+                        c_type_ = output_type_converters.get(v)
+                        if record[v]:
+                            # If it is a float, replace the empty character.
+                            if c_type_.__name__ == 'float':
+                                arg = output_type_converters.get(v)(record[v].replace(' ', ''))
+                            else:
+                                arg = output_type_converters.get(v)(record[v])
+                        else:
+                            arg = record[v]
+                    else:
+                        arg = v
+                    f_args = f_args + (arg, )
+                func_ = globals()[columns_definitions[column]]
+                out_rec.append(output_type_converters[column](func_(*f_args)))
+            else:
+                out_rec.append(record[column])
+        print("{{}}".format(delimiter).join((str(i) for i in out_rec)))

teradataml/telemetry_utils/__init__.py ADDED Viewed

File without changes

teradataml/telemetry_utils/queryband.py ADDED Viewed

@@ -0,0 +1,52 @@
+from functools import wraps
+from teradatasqlalchemy.telemetry.queryband import QueryBand, collect_queryband as tdsqlalchemy_collect_queryband
+# Create a global variable to manage querybands for teradataml package.
+global session_queryband
+session_queryband = QueryBand()
+def collect_queryband(*qb_deco_pos_args, **qb_deco_kwargs):
+    """
+    DESCRIPTION:
+        Decorator for calling collect_queryband decorator in telemetry utility
+        in teradatasqlalchemy using session_queryband object and other positional
+        and keyword arguments expected by collect_queryband.
+    PARAMETERS:
+        qb_deco_pos_args:
+            Optional Argument.
+            Specifies the positional arguments accepted by collect_queryband
+            decorator in telemetry utility in teradatasqlalchemy.
+        qb_deco_kwargs:
+            Optional Argument.
+            Specifies the keyword arguments accepted by collect_queryband
+            decorator in telemetry utility in teradatasqlalchemy.
+    EXAMPLES:
+        >>> from teradataml.telemetry_utils.queryband import collect_queryband
+        # Example 1: Collect queryband for a standalone function.
+        @collect_queryband(queryband="CreateContext")
+        def create_context(host = None, username ...): ...
+        # Example 2: Collect queryband for a class method and use
+        #            class attribute to retrive queryband string.
+        @collect_queryband(attr="func_name")
+        def _execute_query(self, persist=False, volatile=False):...
+        # Example 3: Collect queryband for a class method and use
+        #            method of same class to retrive queryband string.
+        @collect_queryband(method="get_class_specific_queryband")
+        def _execute_query(self, persist=False, volatile=False):...
+    """
+    def outer_wrapper(func):
+        @wraps(func)
+        def inner_wrapper(*func_args, **func_kwargs):
+            # Pass the required argument 'session_queryband' along with other
+            # expected arguments to collect_queryband() decorator which is
+            # imported as tdsqlalchemy_collect_queryband.
+            return tdsqlalchemy_collect_queryband(session_queryband, *qb_deco_pos_args, **qb_deco_kwargs)(func)(*func_args, **func_kwargs)
+        return inner_wrapper
+    return outer_wrapper

teradataml/utils/validators.py CHANGED Viewed

@@ -170,7 +170,7 @@ class _Validators:
                 Required Argument.
                 Specifies the name or list of names of columns to be validated
                 for existence.
-                Types: str or List of strings
+                Types: str or List of strings or ColumnExpression or list of ColumnExpression
             arg_name:
                 Required Argument.
@@ -204,7 +204,15 @@ class _Validators:
         df_columns = UtilFuncs._all_df_columns(column_expression)
         # Let's validate existence of each column one by one.
-        for column_name in columns:
+        columns_ = []
+        for column in columns:
+            if isinstance(column, str):
+                columns_.append(column)
+            else:
+                columns_ = columns_ + UtilFuncs._all_df_columns(column)
+        # Let's validate existence of each column one by one.
+        for column_name in columns_:
             # If column name does not exist in DataFrame of a column, raise the exception.
             if column_name not in df_columns:
                 message = "{}. Check the argument '{}'".format(sorted(df_columns), arg_name)
@@ -1652,7 +1660,7 @@ class _Validators:
         # Check whether table exists on the system or not.
         table_exists = conn.dialect.has_table(conn, table_name=table_name,
-                                              schema=schema_name)
+                                              schema=schema_name, table_only=True)
         # If tables exists, return True.
         if table_exists:
@@ -2237,3 +2245,33 @@ class _Validators:
             raise TeradataMlException(message,
                                       MessageCodes.IMPORT_PYTHON_PACKAGE)
         return True
+    @staticmethod
+    @skip_validation()
+    def _validate_ipaddress(ip_address):
+        """
+        DESCRIPTION:
+            Check if ipaddress is valid.
+        PARAMETERS:
+            ip_address:
+                Required Argument.
+                Specifies the ip address to be validated.
+                Types: str
+        RETURNS:
+            None.
+        RAISES:
+            TeradataMlException
+        EXAMPLES:
+            _Validators._validate_ipaddress("190.132.12.15")
+        """
+        import ipaddress
+        try:
+            ipaddress.ip_address(ip_address)
+        except Exception as err:
+            raise ValueError(Messages.get_message(
+                MessageCodes.INVALID_ARG_VALUE).format(ip_address, "ip_address",
+                                                       'of four numbers (each between 0 and 255) separated by periods'))
+        return True

teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.2py3-none-any.whl