teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +183 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +2 -2
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +275 -40
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +17 -21
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1553 -319
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +276 -319
- teradataml/automl/data_transformation.py +163 -81
- teradataml/automl/feature_engineering.py +402 -239
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +48 -51
- teradataml/automl/model_training.py +291 -189
- teradataml/catalog/byom.py +8 -8
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +48 -6
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +156 -120
- teradataml/common/messagecodes.py +6 -1
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +103 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +121 -31
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/glm_example.json +28 -1
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +21 -2
- teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
- teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
- teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
- teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +97 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +9 -4
- teradataml/dataframe/data_transfer.py +125 -64
- teradataml/dataframe/dataframe.py +575 -57
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +273 -90
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +740 -18
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +324 -18
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
- teradataml/options/__init__.py +16 -5
- teradataml/options/configure.py +39 -6
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +26 -19
- teradataml/scriptmgmt/lls_utils.py +120 -16
- teradataml/table_operators/Script.py +4 -5
- teradataml/table_operators/TableOperator.py +160 -26
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +41 -3
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -16,7 +16,7 @@ import os
|
|
|
16
16
|
import time
|
|
17
17
|
import uuid
|
|
18
18
|
from math import floor
|
|
19
|
-
import
|
|
19
|
+
import warnings
|
|
20
20
|
import subprocess
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
import teradataml.dataframe as tdmldf
|
|
@@ -1012,16 +1012,24 @@ class TableOperator:
|
|
|
1012
1012
|
repr_string = "{}\n\n{}".format(repr_string, self.result)
|
|
1013
1013
|
return repr_string
|
|
1014
1014
|
|
|
1015
|
-
def deploy(self, model_column, partition_columns=None, model_file_prefix=None
|
|
1015
|
+
def deploy(self, model_column, partition_columns=None, model_file_prefix=None, retry=3,
|
|
1016
|
+
retry_timeout=30):
|
|
1016
1017
|
"""
|
|
1017
1018
|
DESCRIPTION:
|
|
1018
|
-
Function deploys the
|
|
1019
|
-
environment in
|
|
1019
|
+
Function deploys the models generated after running `execute_script()` in database in
|
|
1020
|
+
VantageCloud Enterprise or in user environment in VantageCloud Lake.
|
|
1021
|
+
If deployed files are not needed, these files can be removed using `remove_file()` in
|
|
1022
|
+
database or `UserEnv.remove_file()` in lake.
|
|
1023
|
+
|
|
1024
|
+
Note:
|
|
1025
|
+
If the models (one or many) fail to get deployed in Vantage even after retries,
|
|
1026
|
+
try deploying them again using `install_file()` function or remove installed
|
|
1027
|
+
files using `remove_file()` function.
|
|
1020
1028
|
|
|
1021
1029
|
PARAMETERS:
|
|
1022
1030
|
model_column:
|
|
1023
1031
|
Required Argument.
|
|
1024
|
-
Specifies the column name in which
|
|
1032
|
+
Specifies the column name in which models are present.
|
|
1025
1033
|
Supported types of model in this column are CLOB and BLOB.
|
|
1026
1034
|
Note:
|
|
1027
1035
|
The column mentioned in this argument should be present in
|
|
@@ -1049,13 +1057,31 @@ class TableOperator:
|
|
|
1049
1057
|
with underscore(_) to generate model file names.
|
|
1050
1058
|
Types: str
|
|
1051
1059
|
|
|
1060
|
+
retry:
|
|
1061
|
+
Optional Argument.
|
|
1062
|
+
Specifies the maximum number of retries to be made to deploy the models.
|
|
1063
|
+
This argument helps in retrying the deployment of models in case of network issues.
|
|
1064
|
+
This argument should be a positive integer.
|
|
1065
|
+
Default Value: 3
|
|
1066
|
+
Types: int
|
|
1067
|
+
|
|
1068
|
+
retry_timeout:
|
|
1069
|
+
Optional Argument. Used along with retry argument. Ignored otherwise.
|
|
1070
|
+
Specifies the time interval in seconds between each retry.
|
|
1071
|
+
This argument should be a positive integer.
|
|
1072
|
+
Default Value: 30
|
|
1073
|
+
Types: int
|
|
1074
|
+
|
|
1052
1075
|
RETURNS:
|
|
1053
|
-
List of generated file names.
|
|
1076
|
+
List of generated file identifiers in database or file names in lake.
|
|
1054
1077
|
|
|
1055
1078
|
RAISES:
|
|
1056
|
-
TeradatamlException
|
|
1079
|
+
- TeradatamlException
|
|
1080
|
+
- Throws warning when models failed to deploy even after retries.
|
|
1057
1081
|
|
|
1058
1082
|
EXAMPLES:
|
|
1083
|
+
>>> import teradataml
|
|
1084
|
+
>>> from teradataml import load_example_data
|
|
1059
1085
|
>>> load_example_data("openml", "multi_model_classification")
|
|
1060
1086
|
|
|
1061
1087
|
>>> df = DataFrame("multi_model_classification")
|
|
@@ -1073,12 +1099,16 @@ class TableOperator:
|
|
|
1073
1099
|
-0.615226 -0.546472 0.017496 -0.488720 0 12 0 10
|
|
1074
1100
|
0.579671 -0.573365 0.160603 0.014404 0 9 1 10
|
|
1075
1101
|
|
|
1102
|
+
## Run in VantageCloud Enterprise using Script object.
|
|
1076
1103
|
# Install Script file.
|
|
1077
1104
|
>>> file_location = os.path.join(os.path.dirname(teradataml.__file__), "data", "scripts", "deploy_script.py")
|
|
1078
1105
|
>>> install_file("deploy_script", file_location, replace=True)
|
|
1079
1106
|
|
|
1107
|
+
>>> execute_sql("SET SESSION SEARCHUIFDBPATH = <db_name>;")
|
|
1108
|
+
|
|
1080
1109
|
# Variables needed for Script execution.
|
|
1081
|
-
>>>
|
|
1110
|
+
>>> from teradataml import configure
|
|
1111
|
+
>>> script_command = f'{configure.indb_install_location} ./<db_name>/deploy_script.py enterprise'
|
|
1082
1112
|
>>> partition_columns = ["partition_column_1", "partition_column_2"]
|
|
1083
1113
|
>>> columns = ["col1", "col2", "col3", "col4", "label",
|
|
1084
1114
|
"partition_column_1", "partition_column_2"]
|
|
@@ -1104,10 +1134,10 @@ class TableOperator:
|
|
|
1104
1134
|
# is auto generated.
|
|
1105
1135
|
>>> obj.deploy(model_column="model",
|
|
1106
1136
|
partition_columns=["partition_column_1", "partition_column_2"])
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1137
|
+
['model_file_1710436227163427__0_10',
|
|
1138
|
+
'model_file_1710436227163427__1_10',
|
|
1139
|
+
'model_file_1710436227163427__0_11',
|
|
1140
|
+
'model_file_1710436227163427__1_11']
|
|
1111
1141
|
|
|
1112
1142
|
# Example 2: Provide only "model_file_prefix" argument. Here, filenames are suffixed
|
|
1113
1143
|
# with 1, 2, 3, ... for multiple models.
|
|
@@ -1131,15 +1161,74 @@ class TableOperator:
|
|
|
1131
1161
|
'my_prefix_new__0_11',
|
|
1132
1162
|
'my_prefix_new__1_10',
|
|
1133
1163
|
'my_prefix_new__1_11']
|
|
1134
|
-
|
|
1164
|
+
|
|
1165
|
+
# Example 5: Assuming that 2 model files fail to get installed due to network issues,
|
|
1166
|
+
# the function retries installing the failed files twice with timeout between
|
|
1167
|
+
# retries of 10 secs.
|
|
1168
|
+
>>> opt = obj.deploy(model_column="model", model_file_prefix="my_prefix_",
|
|
1169
|
+
partition_columns=["partition_column_1", "partition_column_2"],
|
|
1170
|
+
retry=2, retry_timeout=10)
|
|
1171
|
+
RuntimeWarning: The following model files failed to get installed in Vantage:
|
|
1172
|
+
['my_prefix__1_10', 'my_prefix__1_11'].
|
|
1173
|
+
Try manually deploying them from the path '<temp_path>' using:
|
|
1174
|
+
- `install_file()` when connected to Enterprise/On-Prem system or
|
|
1175
|
+
- `UserEnv.install_file()` when connected to Lake system.
|
|
1176
|
+
OR
|
|
1177
|
+
Remove the returned installed files manually using `remove_file()` or `UserEnv.remove_file()`.
|
|
1178
|
+
>>> opt
|
|
1179
|
+
['my_prefix__0_10',
|
|
1180
|
+
'my_prefix__0_11']
|
|
1181
|
+
|
|
1182
|
+
## Run in VantageCloud Lake using Apply object.
|
|
1183
|
+
# Let's assume an user environment named "user_env" already exists in VantageCloud Lake,
|
|
1184
|
+
# which will be used for the examples below.
|
|
1185
|
+
|
|
1186
|
+
# ApplyTableOperator returns BLOB type for model column as per deploy_script.py.
|
|
1187
|
+
>>> returns = OrderedDict([("partition_column_1", INTEGER()),
|
|
1188
|
+
("partition_column_2", INTEGER()),
|
|
1189
|
+
("model", BLOB())])
|
|
1190
|
+
|
|
1191
|
+
# Install the script file which returns model and partition columns.
|
|
1192
|
+
>>> user_env.install_file(file_location)
|
|
1193
|
+
|
|
1194
|
+
>>> script_command = 'python3 deploy_script.py lake'
|
|
1195
|
+
>>> obj = Apply(data=df.select(columns),
|
|
1196
|
+
script_command=script_command,
|
|
1197
|
+
data_partition_column=partition_columns,
|
|
1198
|
+
returns=returns,
|
|
1199
|
+
env_name="user_env"
|
|
1200
|
+
)
|
|
1201
|
+
|
|
1202
|
+
>>> opt = obj.execute_script()
|
|
1203
|
+
>>> opt
|
|
1204
|
+
partition_column_1 partition_column_2 model model
|
|
1205
|
+
0 10 b'gAejc1.....drIr'
|
|
1206
|
+
0 11 b'gANjcw.....qWIu'
|
|
1207
|
+
1 10 b'abdwcd.....dWIz'
|
|
1208
|
+
1 11 b'gA4jc4.....agfu'
|
|
1209
|
+
|
|
1210
|
+
# Example 6: Provide both "partition_columns" and "model_file_prefix" arguments.
|
|
1211
|
+
>>> obj.deploy(model_column="model", model_file_prefix="my_prefix_",
|
|
1212
|
+
partition_columns=["partition_column_1", "partition_column_2"])
|
|
1213
|
+
['my_prefix__0_10',
|
|
1214
|
+
'my_prefix__0_11',
|
|
1215
|
+
'my_prefix__1_10',
|
|
1216
|
+
'my_prefix__1_11']
|
|
1217
|
+
|
|
1218
|
+
# Other examples are similar to the examples provided for VantageCloud Enterprise.
|
|
1135
1219
|
"""
|
|
1136
1220
|
|
|
1137
1221
|
arg_info_matrix = []
|
|
1138
1222
|
arg_info_matrix.append(["model_column", model_column, False, (str)])
|
|
1139
1223
|
arg_info_matrix.append(["partition_columns", partition_columns, True, (str, list)])
|
|
1140
1224
|
arg_info_matrix.append(["model_file_prefix", model_file_prefix, True, (str)])
|
|
1225
|
+
arg_info_matrix.append(["retry", retry, True, (int)])
|
|
1226
|
+
arg_info_matrix.append(["retry_timeout", retry_timeout, True, (int)])
|
|
1141
1227
|
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1142
1228
|
|
|
1229
|
+
_Validators._validate_positive_int(retry, "retry", lbound_inclusive=True)
|
|
1230
|
+
_Validators._validate_positive_int(retry_timeout, "retry_timeout", lbound_inclusive=True)
|
|
1231
|
+
|
|
1143
1232
|
if self.result is None:
|
|
1144
1233
|
return "Result is empty. Please run execute_script first."
|
|
1145
1234
|
|
|
@@ -1167,7 +1256,28 @@ class TableOperator:
|
|
|
1167
1256
|
model_column_type = data._td_column_names_and_sqlalchemy_types[model_column.lower()].__class__.__name__
|
|
1168
1257
|
|
|
1169
1258
|
n_models = len(vals)
|
|
1170
|
-
|
|
1259
|
+
|
|
1260
|
+
# Default location for .teradataml is user's home directory if configure.local_storage is not set.
|
|
1261
|
+
tempdir = GarbageCollector._get_temp_dir_name()
|
|
1262
|
+
|
|
1263
|
+
def __install_file(model_file, model_file_path):
|
|
1264
|
+
"""
|
|
1265
|
+
Function to install the model file in Vantage and return the status.
|
|
1266
|
+
"""
|
|
1267
|
+
file_installed = True
|
|
1268
|
+
try:
|
|
1269
|
+
if self.__class__.__name__ == "Script":
|
|
1270
|
+
from teradataml.dbutils.filemgr import install_file
|
|
1271
|
+
install_file(file_identifier=model_file, file_path=model_file_path,
|
|
1272
|
+
is_binary=True, suppress_output=True, replace=True)
|
|
1273
|
+
elif self.__class__.__name__ == "Apply":
|
|
1274
|
+
self.env.install_file(file_path=model_file_path, suppress_output=True, replace=True)
|
|
1275
|
+
except Exception as e:
|
|
1276
|
+
file_installed = False
|
|
1277
|
+
return file_installed
|
|
1278
|
+
|
|
1279
|
+
installed_files = []
|
|
1280
|
+
failed_files = []
|
|
1171
1281
|
|
|
1172
1282
|
for i, row in enumerate(vals):
|
|
1173
1283
|
model = row[0]
|
|
@@ -1178,7 +1288,7 @@ class TableOperator:
|
|
|
1178
1288
|
partition_values = str(i+1)
|
|
1179
1289
|
|
|
1180
1290
|
model_file = f"{model_file_prefix}_{partition_values}"
|
|
1181
|
-
model_file_path = os.path.join(
|
|
1291
|
+
model_file_path = os.path.join(tempdir, model_file)
|
|
1182
1292
|
|
|
1183
1293
|
if model_column_type == "CLOB":
|
|
1184
1294
|
import base64
|
|
@@ -1193,15 +1303,39 @@ class TableOperator:
|
|
|
1193
1303
|
with open(model_file_path, "wb") as f:
|
|
1194
1304
|
f.write(model)
|
|
1195
1305
|
|
|
1196
|
-
|
|
1197
|
-
from teradataml import install_file
|
|
1198
|
-
install_file(file_identifier=model_file, file_path=model_file_path,
|
|
1199
|
-
is_binary=True, suppress_output=True)
|
|
1200
|
-
elif self.__class__.__name__ == "Apply":
|
|
1201
|
-
self.env.install_file(file_name=model_file_path)
|
|
1202
|
-
|
|
1203
|
-
all_files.append(model_file)
|
|
1306
|
+
file_installed = __install_file(model_file, model_file_path)
|
|
1204
1307
|
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1308
|
+
if file_installed:
|
|
1309
|
+
installed_files.append(model_file)
|
|
1310
|
+
os.remove(model_file_path)
|
|
1311
|
+
else:
|
|
1312
|
+
# File failed to get installed in Vantage. Hence, keeping the file in tempdir.
|
|
1313
|
+
failed_files.append(model_file)
|
|
1314
|
+
|
|
1315
|
+
while retry and failed_files:
|
|
1316
|
+
# If there are any failed files and retry is not zero, retry installing the failed files.
|
|
1317
|
+
time.sleep(retry_timeout)
|
|
1318
|
+
retry_failed_files = []
|
|
1319
|
+
for model_file in failed_files:
|
|
1320
|
+
model_file_path = os.path.join(tempdir, model_file)
|
|
1321
|
+
file_installed = __install_file(model_file, model_file_path)
|
|
1322
|
+
|
|
1323
|
+
if file_installed:
|
|
1324
|
+
installed_files.append(model_file)
|
|
1325
|
+
os.remove(model_file_path)
|
|
1326
|
+
else:
|
|
1327
|
+
# File failed to get installed in Vantage. Hence, keeping the file in tempdir.
|
|
1328
|
+
retry_failed_files.append(model_file)
|
|
1329
|
+
failed_files = retry_failed_files
|
|
1330
|
+
retry -= 1
|
|
1331
|
+
|
|
1332
|
+
if failed_files:
|
|
1333
|
+
failed_files.sort()
|
|
1334
|
+
warning_message = "The following model files failed to get installed in Vantage:\n" + str(failed_files) + ".\n"
|
|
1335
|
+
warning_message += "Try manually deploying them from the path '" + tempdir + "' using:\n"
|
|
1336
|
+
warning_message += " - `install_file()` when connected to Enterprise/On-Prem system or\n"
|
|
1337
|
+
warning_message += " - `UserEnv.install_file()` when connected to Lake system.\n"
|
|
1338
|
+
warning_message += "OR\nRemove the returned installed files manually using `remove_file()` or `UserEnv.remove_file()`."
|
|
1339
|
+
warnings.warn(RuntimeWarning(warning_message))
|
|
1340
|
+
|
|
1341
|
+
return installed_files
|
|
@@ -9,8 +9,7 @@
|
|
|
9
9
|
# Description: Utilities for Table Operators. #
|
|
10
10
|
# #
|
|
11
11
|
# ##################################################################
|
|
12
|
-
|
|
13
|
-
import os
|
|
12
|
+
import os, json
|
|
14
13
|
import teradataml.dataframe as tdmldf
|
|
15
14
|
from teradataml.common.constants import TableOperatorConstants, \
|
|
16
15
|
TeradataConstants, OutputStyle
|
|
@@ -24,7 +23,7 @@ from teradataml.scriptmgmt.lls_utils import get_env
|
|
|
24
23
|
from teradataml.utils.utils import execute_sql
|
|
25
24
|
from teradataml.utils.validators import _Validators
|
|
26
25
|
from functools import partial
|
|
27
|
-
from inspect import isfunction
|
|
26
|
+
from inspect import isfunction, getsource
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
class _TableOperatorUtils:
|
|
@@ -281,12 +280,19 @@ class _TableOperatorUtils:
|
|
|
281
280
|
self.__validate()
|
|
282
281
|
"""
|
|
283
282
|
# Validate the user defined function.
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
283
|
+
|
|
284
|
+
if self.operation == TableOperatorConstants.UDF_OP.value:
|
|
285
|
+
for udf_function in self.user_function:
|
|
286
|
+
if not isfunction(udf_function):
|
|
287
|
+
raise TypeError(Messages.get_message(
|
|
288
|
+
MessageCodes.UNSUPPORTED_DATATYPE, 'user_function', "'function'"))
|
|
289
|
+
else:
|
|
290
|
+
if not (isfunction(self.user_function) or
|
|
291
|
+
isinstance(self.user_function, partial)):
|
|
292
|
+
raise TypeError(Messages.get_message(
|
|
293
|
+
MessageCodes.UNSUPPORTED_DATATYPE, 'user_function',
|
|
294
|
+
"'function' or 'functools.partial'")
|
|
295
|
+
)
|
|
290
296
|
|
|
291
297
|
if arg_info_matrix is None:
|
|
292
298
|
arg_info_matrix = []
|
|
@@ -349,37 +355,73 @@ class _TableOperatorUtils:
|
|
|
349
355
|
os.path.dirname(os.path.abspath(__file__))),
|
|
350
356
|
"table_operators",
|
|
351
357
|
"templates")
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
358
|
+
# Get the template.
|
|
359
|
+
template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
|
|
360
|
+
TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value}
|
|
361
|
+
template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
|
|
355
362
|
# Write to the script based on the template.
|
|
356
363
|
try:
|
|
357
364
|
with open(os.path.join(template_dir, template_name), 'r') as input_file:
|
|
358
365
|
with open(self.script_path, 'w') as output_file:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
366
|
+
if self.operation == TableOperatorConstants.UDF_OP.value:
|
|
367
|
+
|
|
368
|
+
# Function can have udf as decorator. Remove that.
|
|
369
|
+
# The below notation
|
|
370
|
+
# @udf
|
|
371
|
+
# def to_upper(s):
|
|
372
|
+
# return s.upper()
|
|
373
|
+
# Then source code will be as it is.
|
|
374
|
+
# But if below notation is used,
|
|
375
|
+
# f = udf(to_upper)
|
|
376
|
+
# Then source code will not have udf.
|
|
377
|
+
# So, remove first line if it comes with first notation.
|
|
378
|
+
# For both notations if in starting function defination have any extra space. Remove that.
|
|
379
|
+
# If multiple UDF's are there append them as a single string.
|
|
380
|
+
|
|
381
|
+
user_function_code = ""
|
|
382
|
+
for udf_code in self.user_function:
|
|
383
|
+
udf_code = getsource(udf_code)
|
|
384
|
+
udf_code = udf_code.lstrip()
|
|
385
|
+
if udf_code.startswith("@"):
|
|
386
|
+
udf_code = udf_code[udf_code.find("\n")+1: ].lstrip()
|
|
387
|
+
user_function_code += udf_code + '\n'
|
|
388
|
+
|
|
389
|
+
output_file.write(input_file.read().format(
|
|
390
|
+
DELIMITER=self.delimiter,
|
|
391
|
+
QUOTECHAR=self.quotechar,
|
|
392
|
+
FUNCTION_DEFINITION=user_function_code,
|
|
393
|
+
FUNCTION_ARGS =str(self.function_args),
|
|
394
|
+
INPUT_COLUMNS=json.dumps(self.data.columns),
|
|
395
|
+
OUTPUT_COLUMNS=json.dumps(list(self.returns.keys())),
|
|
396
|
+
COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
|
|
397
|
+
OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
|
|
398
|
+
))
|
|
399
|
+
else:
|
|
400
|
+
# prepare script file from template file for maprow and mappartition.
|
|
401
|
+
output_file.write(
|
|
402
|
+
input_file.read().format(
|
|
403
|
+
DELIMITER=UtilFuncs._serialize_and_encode(
|
|
404
|
+
self.delimiter),
|
|
405
|
+
STO_OPERATION=UtilFuncs._serialize_and_encode(
|
|
406
|
+
self.operation),
|
|
407
|
+
USER_DEF_FUNC=UtilFuncs._serialize_and_encode(
|
|
408
|
+
self.user_function),
|
|
409
|
+
DF_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
|
|
410
|
+
self.data.columns),
|
|
411
|
+
DF_COL_TYPES_LIST=UtilFuncs._serialize_and_encode(
|
|
412
|
+
python_input_col_types),
|
|
413
|
+
OUTPUT_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
|
|
414
|
+
list(self.returns.keys())),
|
|
415
|
+
OUTPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
|
|
416
|
+
output_converters),
|
|
417
|
+
QUOTECHAR=UtilFuncs._serialize_and_encode(
|
|
418
|
+
self.quotechar),
|
|
419
|
+
INPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
|
|
420
|
+
input_converters),
|
|
421
|
+
CHUNK_SIZE=UtilFuncs._serialize_and_encode(
|
|
422
|
+
self.chunk_size)
|
|
423
|
+
)
|
|
381
424
|
)
|
|
382
|
-
)
|
|
383
425
|
except Exception:
|
|
384
426
|
# We may end up here if the formatting of the templating to create
|
|
385
427
|
# the user script fails.
|
|
@@ -410,9 +452,11 @@ class _TableOperatorUtils:
|
|
|
410
452
|
"""
|
|
411
453
|
try:
|
|
412
454
|
if self.operation in [TableOperatorConstants.MAP_ROW_OP.value,
|
|
413
|
-
TableOperatorConstants.MAP_PARTITION_OP.value]
|
|
455
|
+
TableOperatorConstants.MAP_PARTITION_OP.value] or \
|
|
456
|
+
(self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'IN-DB'):
|
|
414
457
|
return self.__execute_script_table_operator()
|
|
415
|
-
elif self.operation == TableOperatorConstants.APPLY_OP.value
|
|
458
|
+
elif self.operation == TableOperatorConstants.APPLY_OP.value or \
|
|
459
|
+
(self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'REMOTE'):
|
|
416
460
|
return self.__execute_apply_table_operator()
|
|
417
461
|
except Exception:
|
|
418
462
|
raise
|
|
@@ -572,8 +616,9 @@ class _TableOperatorUtils:
|
|
|
572
616
|
if self.exec_mode.upper() == TableOperatorConstants.REMOTE_EXEC.value:
|
|
573
617
|
# If not test mode, execute the script using Apply table operator.
|
|
574
618
|
try:
|
|
575
|
-
# If APPLY, get environment and use it for installing file.
|
|
576
|
-
if self.operation
|
|
619
|
+
# If APPLY or UDF, get environment and use it for installing file.
|
|
620
|
+
if self.operation in [TableOperatorConstants.APPLY_OP.value,
|
|
621
|
+
TableOperatorConstants.UDF_OP.value]:
|
|
577
622
|
self.__env.install_file(self.script_path, suppress_output=True)
|
|
578
623
|
|
|
579
624
|
# Execute the script.
|
|
@@ -617,13 +662,15 @@ class _TableOperatorUtils:
|
|
|
617
662
|
suppress_output=True)
|
|
618
663
|
|
|
619
664
|
# For apply, remove file from remote user environment.
|
|
620
|
-
if self.operation == TableOperatorConstants.APPLY_OP.value
|
|
665
|
+
if self.operation == TableOperatorConstants.APPLY_OP.value or \
|
|
666
|
+
(self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'REMOTE'):
|
|
621
667
|
self.__env.remove_file(self.script_name, suppress_output=True)
|
|
622
668
|
|
|
623
669
|
# Remove the entry from Garbage Collector
|
|
624
670
|
if self.operation in [TableOperatorConstants.MAP_ROW_OP.value,
|
|
625
671
|
TableOperatorConstants.MAP_PARTITION_OP.value,
|
|
626
|
-
TableOperatorConstants.APPLY_OP.value
|
|
672
|
+
TableOperatorConstants.APPLY_OP.value,
|
|
673
|
+
TableOperatorConstants.UDF_OP.value]:
|
|
627
674
|
GarbageCollector._delete_object_entry(
|
|
628
675
|
object_to_delete=self.script_entry,
|
|
629
676
|
object_type=TeradataConstants.TERADATA_SCRIPT,
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import sys, csv
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
td_buffer = {{}}
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
{FUNCTION_DEFINITION}
|
|
8
|
+
|
|
9
|
+
function_args = {FUNCTION_ARGS}
|
|
10
|
+
# Information that is required to help with the script usage.
|
|
11
|
+
# The delimiter to use with the input and output text.
|
|
12
|
+
delimiter = "{DELIMITER}"
|
|
13
|
+
# The names of columns in the input teradataml DataFrame.
|
|
14
|
+
_input_columns = {INPUT_COLUMNS}
|
|
15
|
+
# The names of columns in the output teradataml DataFrame.
|
|
16
|
+
_output_columns = {OUTPUT_COLUMNS}
|
|
17
|
+
# The definition for new columns in output.
|
|
18
|
+
columns_definitions = {COLUMNS_DEFINITIONS}
|
|
19
|
+
# The types of columns in the input/output teradataml DataFrame.
|
|
20
|
+
output_type_converters = {OUTPUT_TYPE_CONVERTERS}
|
|
21
|
+
for k,v in output_type_converters.items():
|
|
22
|
+
if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
|
|
23
|
+
output_type_converters[k] = 'str'
|
|
24
|
+
output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
|
|
25
|
+
# The quotechar to use.
|
|
26
|
+
quotechar = "{QUOTECHAR}"
|
|
27
|
+
if quotechar == "None":
|
|
28
|
+
quotechar = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# The entry point to the script.
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
|
|
34
|
+
records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
|
|
35
|
+
for record in records:
|
|
36
|
+
record = dict(zip(_input_columns, record))
|
|
37
|
+
out_rec = []
|
|
38
|
+
for column in _output_columns:
|
|
39
|
+
|
|
40
|
+
# If it is a new column, get the value from definition.
|
|
41
|
+
if column in columns_definitions:
|
|
42
|
+
f_args = tuple()
|
|
43
|
+
# Convert the argument types first.
|
|
44
|
+
for v in function_args[column]:
|
|
45
|
+
if v in _input_columns:
|
|
46
|
+
c_type_ = output_type_converters.get(v)
|
|
47
|
+
if record[v]:
|
|
48
|
+
# If it is a float, replace the empty character.
|
|
49
|
+
if c_type_.__name__ == 'float':
|
|
50
|
+
arg = output_type_converters.get(v)(record[v].replace(' ', ''))
|
|
51
|
+
else:
|
|
52
|
+
arg = output_type_converters.get(v)(record[v])
|
|
53
|
+
else:
|
|
54
|
+
arg = record[v]
|
|
55
|
+
else:
|
|
56
|
+
arg = v
|
|
57
|
+
f_args = f_args + (arg, )
|
|
58
|
+
func_ = globals()[columns_definitions[column]]
|
|
59
|
+
out_rec.append(output_type_converters[column](func_(*f_args)))
|
|
60
|
+
else:
|
|
61
|
+
out_rec.append(record[column])
|
|
62
|
+
|
|
63
|
+
print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
|
|
File without changes
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from teradatasqlalchemy.telemetry.queryband import QueryBand, collect_queryband as tdsqlalchemy_collect_queryband
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# Create a global variable to manage querybands for teradataml package.
|
|
6
|
+
global session_queryband
|
|
7
|
+
session_queryband = QueryBand()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def collect_queryband(*qb_deco_pos_args, **qb_deco_kwargs):
|
|
11
|
+
"""
|
|
12
|
+
DESCRIPTION:
|
|
13
|
+
Decorator for calling collect_queryband decorator in telemetry utility
|
|
14
|
+
in teradatasqlalchemy using session_queryband object and other positional
|
|
15
|
+
and keyword arguments expected by collect_queryband.
|
|
16
|
+
|
|
17
|
+
PARAMETERS:
|
|
18
|
+
qb_deco_pos_args:
|
|
19
|
+
Optional Argument.
|
|
20
|
+
Specifies the positional arguments accepted by collect_queryband
|
|
21
|
+
decorator in telemetry utility in teradatasqlalchemy.
|
|
22
|
+
|
|
23
|
+
qb_deco_kwargs:
|
|
24
|
+
Optional Argument.
|
|
25
|
+
Specifies the keyword arguments accepted by collect_queryband
|
|
26
|
+
decorator in telemetry utility in teradatasqlalchemy.
|
|
27
|
+
|
|
28
|
+
EXAMPLES:
|
|
29
|
+
>>> from teradataml.telemetry_utils.queryband import collect_queryband
|
|
30
|
+
# Example 1: Collect queryband for a standalone function.
|
|
31
|
+
@collect_queryband(queryband="CreateContext")
|
|
32
|
+
def create_context(host = None, username ...): ...
|
|
33
|
+
|
|
34
|
+
# Example 2: Collect queryband for a class method and use
|
|
35
|
+
# class attribute to retrive queryband string.
|
|
36
|
+
@collect_queryband(attr="func_name")
|
|
37
|
+
def _execute_query(self, persist=False, volatile=False):...
|
|
38
|
+
|
|
39
|
+
# Example 3: Collect queryband for a class method and use
|
|
40
|
+
# method of same class to retrive queryband string.
|
|
41
|
+
@collect_queryband(method="get_class_specific_queryband")
|
|
42
|
+
def _execute_query(self, persist=False, volatile=False):...
|
|
43
|
+
"""
|
|
44
|
+
def outer_wrapper(func):
|
|
45
|
+
@wraps(func)
|
|
46
|
+
def inner_wrapper(*func_args, **func_kwargs):
|
|
47
|
+
# Pass the required argument 'session_queryband' along with other
|
|
48
|
+
# expected arguments to collect_queryband() decorator which is
|
|
49
|
+
# imported as tdsqlalchemy_collect_queryband.
|
|
50
|
+
return tdsqlalchemy_collect_queryband(session_queryband, *qb_deco_pos_args, **qb_deco_kwargs)(func)(*func_args, **func_kwargs)
|
|
51
|
+
return inner_wrapper
|
|
52
|
+
return outer_wrapper
|
teradataml/utils/validators.py
CHANGED
|
@@ -170,7 +170,7 @@ class _Validators:
|
|
|
170
170
|
Required Argument.
|
|
171
171
|
Specifies the name or list of names of columns to be validated
|
|
172
172
|
for existence.
|
|
173
|
-
Types: str or List of strings
|
|
173
|
+
Types: str or List of strings or ColumnExpression or list of ColumnExpression
|
|
174
174
|
|
|
175
175
|
arg_name:
|
|
176
176
|
Required Argument.
|
|
@@ -204,7 +204,15 @@ class _Validators:
|
|
|
204
204
|
df_columns = UtilFuncs._all_df_columns(column_expression)
|
|
205
205
|
|
|
206
206
|
# Let's validate existence of each column one by one.
|
|
207
|
-
|
|
207
|
+
columns_ = []
|
|
208
|
+
for column in columns:
|
|
209
|
+
if isinstance(column, str):
|
|
210
|
+
columns_.append(column)
|
|
211
|
+
else:
|
|
212
|
+
columns_ = columns_ + UtilFuncs._all_df_columns(column)
|
|
213
|
+
|
|
214
|
+
# Let's validate existence of each column one by one.
|
|
215
|
+
for column_name in columns_:
|
|
208
216
|
# If column name does not exist in DataFrame of a column, raise the exception.
|
|
209
217
|
if column_name not in df_columns:
|
|
210
218
|
message = "{}. Check the argument '{}'".format(sorted(df_columns), arg_name)
|
|
@@ -1652,7 +1660,7 @@ class _Validators:
|
|
|
1652
1660
|
|
|
1653
1661
|
# Check whether table exists on the system or not.
|
|
1654
1662
|
table_exists = conn.dialect.has_table(conn, table_name=table_name,
|
|
1655
|
-
schema=schema_name)
|
|
1663
|
+
schema=schema_name, table_only=True)
|
|
1656
1664
|
|
|
1657
1665
|
# If tables exists, return True.
|
|
1658
1666
|
if table_exists:
|
|
@@ -2237,3 +2245,33 @@ class _Validators:
|
|
|
2237
2245
|
raise TeradataMlException(message,
|
|
2238
2246
|
MessageCodes.IMPORT_PYTHON_PACKAGE)
|
|
2239
2247
|
return True
|
|
2248
|
+
|
|
2249
|
+
|
|
2250
|
+
@staticmethod
|
|
2251
|
+
@skip_validation()
|
|
2252
|
+
def _validate_ipaddress(ip_address):
|
|
2253
|
+
"""
|
|
2254
|
+
DESCRIPTION:
|
|
2255
|
+
Check if ipaddress is valid.
|
|
2256
|
+
PARAMETERS:
|
|
2257
|
+
ip_address:
|
|
2258
|
+
Required Argument.
|
|
2259
|
+
Specifies the ip address to be validated.
|
|
2260
|
+
Types: str
|
|
2261
|
+
RETURNS:
|
|
2262
|
+
None.
|
|
2263
|
+
RAISES:
|
|
2264
|
+
TeradataMlException
|
|
2265
|
+
EXAMPLES:
|
|
2266
|
+
_Validators._validate_ipaddress("190.132.12.15")
|
|
2267
|
+
"""
|
|
2268
|
+
import ipaddress
|
|
2269
|
+
|
|
2270
|
+
try:
|
|
2271
|
+
ipaddress.ip_address(ip_address)
|
|
2272
|
+
except Exception as err:
|
|
2273
|
+
raise ValueError(Messages.get_message(
|
|
2274
|
+
MessageCodes.INVALID_ARG_VALUE).format(ip_address, "ip_address",
|
|
2275
|
+
'of four numbers (each between 0 and 255) separated by periods'))
|
|
2276
|
+
|
|
2277
|
+
return True
|