teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +10 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +299 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +13 -3
- teradataml/analytics/json_parser/utils.py +13 -6
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +11 -2
- teradataml/analytics/table_operator/__init__.py +4 -3
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +66 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +247 -307
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +325 -86
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +122 -153
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +72 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +152 -120
- teradataml/common/messagecodes.py +11 -2
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +26 -4
- teradataml/common/utils.py +225 -14
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +82 -2
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +27 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +1002 -201
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +867 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +840 -33
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +878 -34
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
- teradataml/options/__init__.py +9 -23
- teradataml/options/configure.py +42 -4
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +13 -9
- teradataml/scriptmgmt/lls_utils.py +77 -23
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +102 -56
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +34 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -431,8 +431,8 @@ class Script(TableOperator):
|
|
|
431
431
|
from teradataml import list_td_reserved_keywords
|
|
432
432
|
if get_connection():
|
|
433
433
|
# Checking for reserved keywords and raising error if present.
|
|
434
|
-
|
|
435
|
-
|
|
434
|
+
columns = self.returns
|
|
435
|
+
list_td_reserved_keywords(key=columns, raise_error=True)
|
|
436
436
|
|
|
437
437
|
def __validate(self):
|
|
438
438
|
"""
|
|
@@ -16,7 +16,7 @@ import os
|
|
|
16
16
|
import time
|
|
17
17
|
import uuid
|
|
18
18
|
from math import floor
|
|
19
|
-
import
|
|
19
|
+
import warnings
|
|
20
20
|
import subprocess
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
import teradataml.dataframe as tdmldf
|
|
@@ -1012,18 +1012,24 @@ class TableOperator:
|
|
|
1012
1012
|
repr_string = "{}\n\n{}".format(repr_string, self.result)
|
|
1013
1013
|
return repr_string
|
|
1014
1014
|
|
|
1015
|
-
def deploy(self, model_column, partition_columns=None, model_file_prefix=None
|
|
1015
|
+
def deploy(self, model_column, partition_columns=None, model_file_prefix=None, retry=3,
|
|
1016
|
+
retry_timeout=30):
|
|
1016
1017
|
"""
|
|
1017
1018
|
DESCRIPTION:
|
|
1018
|
-
Function deploys the
|
|
1019
|
+
Function deploys the models generated after running `execute_script()` in database in
|
|
1019
1020
|
VantageCloud Enterprise or in user environment in VantageCloud Lake.
|
|
1020
1021
|
If deployed files are not needed, these files can be removed using `remove_file()` in
|
|
1021
|
-
database or
|
|
1022
|
+
database or `UserEnv.remove_file()` in lake.
|
|
1023
|
+
|
|
1024
|
+
Note:
|
|
1025
|
+
If the models (one or many) fail to get deployed in Vantage even after retries,
|
|
1026
|
+
try deploying them again using `install_file()` function or remove installed
|
|
1027
|
+
files using `remove_file()` function.
|
|
1022
1028
|
|
|
1023
1029
|
PARAMETERS:
|
|
1024
1030
|
model_column:
|
|
1025
1031
|
Required Argument.
|
|
1026
|
-
Specifies the column name in which
|
|
1032
|
+
Specifies the column name in which models are present.
|
|
1027
1033
|
Supported types of model in this column are CLOB and BLOB.
|
|
1028
1034
|
Note:
|
|
1029
1035
|
The column mentioned in this argument should be present in
|
|
@@ -1051,11 +1057,27 @@ class TableOperator:
|
|
|
1051
1057
|
with underscore(_) to generate model file names.
|
|
1052
1058
|
Types: str
|
|
1053
1059
|
|
|
1060
|
+
retry:
|
|
1061
|
+
Optional Argument.
|
|
1062
|
+
Specifies the maximum number of retries to be made to deploy the models.
|
|
1063
|
+
This argument helps in retrying the deployment of models in case of network issues.
|
|
1064
|
+
This argument should be a positive integer.
|
|
1065
|
+
Default Value: 3
|
|
1066
|
+
Types: int
|
|
1067
|
+
|
|
1068
|
+
retry_timeout:
|
|
1069
|
+
Optional Argument. Used along with retry argument. Ignored otherwise.
|
|
1070
|
+
Specifies the time interval in seconds between each retry.
|
|
1071
|
+
This argument should be a positive integer.
|
|
1072
|
+
Default Value: 30
|
|
1073
|
+
Types: int
|
|
1074
|
+
|
|
1054
1075
|
RETURNS:
|
|
1055
1076
|
List of generated file identifiers in database or file names in lake.
|
|
1056
1077
|
|
|
1057
1078
|
RAISES:
|
|
1058
|
-
TeradatamlException
|
|
1079
|
+
- TeradatamlException
|
|
1080
|
+
- Throws warning when models failed to deploy even after retries.
|
|
1059
1081
|
|
|
1060
1082
|
EXAMPLES:
|
|
1061
1083
|
>>> import teradataml
|
|
@@ -1139,7 +1161,24 @@ class TableOperator:
|
|
|
1139
1161
|
'my_prefix_new__0_11',
|
|
1140
1162
|
'my_prefix_new__1_10',
|
|
1141
1163
|
'my_prefix_new__1_11']
|
|
1142
|
-
|
|
1164
|
+
|
|
1165
|
+
# Example 5: Assuming that 2 model files fail to get installed due to network issues,
|
|
1166
|
+
# the function retries installing the failed files twice with timeout between
|
|
1167
|
+
# retries of 10 secs.
|
|
1168
|
+
>>> opt = obj.deploy(model_column="model", model_file_prefix="my_prefix_",
|
|
1169
|
+
partition_columns=["partition_column_1", "partition_column_2"],
|
|
1170
|
+
retry=2, retry_timeout=10)
|
|
1171
|
+
RuntimeWarning: The following model files failed to get installed in Vantage:
|
|
1172
|
+
['my_prefix__1_10', 'my_prefix__1_11'].
|
|
1173
|
+
Try manually deploying them from the path '<temp_path>' using:
|
|
1174
|
+
- `install_file()` when connected to Enterprise/On-Prem system or
|
|
1175
|
+
- `UserEnv.install_file()` when connected to Lake system.
|
|
1176
|
+
OR
|
|
1177
|
+
Remove the returned installed files manually using `remove_file()` or `UserEnv.remove_file()`.
|
|
1178
|
+
>>> opt
|
|
1179
|
+
['my_prefix__0_10',
|
|
1180
|
+
'my_prefix__0_11']
|
|
1181
|
+
|
|
1143
1182
|
## Run in VantageCloud Lake using Apply object.
|
|
1144
1183
|
# Let's assume an user environment named "user_env" already exists in VantageCloud Lake,
|
|
1145
1184
|
# which will be used for the examples below.
|
|
@@ -1168,7 +1207,7 @@ class TableOperator:
|
|
|
1168
1207
|
1 10 b'abdwcd.....dWIz'
|
|
1169
1208
|
1 11 b'gA4jc4.....agfu'
|
|
1170
1209
|
|
|
1171
|
-
# Example
|
|
1210
|
+
# Example 6: Provide both "partition_columns" and "model_file_prefix" arguments.
|
|
1172
1211
|
>>> obj.deploy(model_column="model", model_file_prefix="my_prefix_",
|
|
1173
1212
|
partition_columns=["partition_column_1", "partition_column_2"])
|
|
1174
1213
|
['my_prefix__0_10',
|
|
@@ -1183,8 +1222,13 @@ class TableOperator:
|
|
|
1183
1222
|
arg_info_matrix.append(["model_column", model_column, False, (str)])
|
|
1184
1223
|
arg_info_matrix.append(["partition_columns", partition_columns, True, (str, list)])
|
|
1185
1224
|
arg_info_matrix.append(["model_file_prefix", model_file_prefix, True, (str)])
|
|
1225
|
+
arg_info_matrix.append(["retry", retry, True, (int)])
|
|
1226
|
+
arg_info_matrix.append(["retry_timeout", retry_timeout, True, (int)])
|
|
1186
1227
|
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1187
1228
|
|
|
1229
|
+
_Validators._validate_positive_int(retry, "retry", lbound_inclusive=True)
|
|
1230
|
+
_Validators._validate_positive_int(retry_timeout, "retry_timeout", lbound_inclusive=True)
|
|
1231
|
+
|
|
1188
1232
|
if self.result is None:
|
|
1189
1233
|
return "Result is empty. Please run execute_script first."
|
|
1190
1234
|
|
|
@@ -1212,11 +1256,29 @@ class TableOperator:
|
|
|
1212
1256
|
model_column_type = data._td_column_names_and_sqlalchemy_types[model_column.lower()].__class__.__name__
|
|
1213
1257
|
|
|
1214
1258
|
n_models = len(vals)
|
|
1215
|
-
all_files = []
|
|
1216
1259
|
|
|
1217
1260
|
# Default location for .teradataml is user's home directory if configure.local_storage is not set.
|
|
1218
1261
|
tempdir = GarbageCollector._get_temp_dir_name()
|
|
1219
1262
|
|
|
1263
|
+
def __install_file(model_file, model_file_path):
|
|
1264
|
+
"""
|
|
1265
|
+
Function to install the model file in Vantage and return the status.
|
|
1266
|
+
"""
|
|
1267
|
+
file_installed = True
|
|
1268
|
+
try:
|
|
1269
|
+
if self.__class__.__name__ == "Script":
|
|
1270
|
+
from teradataml.dbutils.filemgr import install_file
|
|
1271
|
+
install_file(file_identifier=model_file, file_path=model_file_path,
|
|
1272
|
+
is_binary=True, suppress_output=True, replace=True)
|
|
1273
|
+
elif self.__class__.__name__ == "Apply":
|
|
1274
|
+
self.env.install_file(file_path=model_file_path, suppress_output=True, replace=True)
|
|
1275
|
+
except Exception as e:
|
|
1276
|
+
file_installed = False
|
|
1277
|
+
return file_installed
|
|
1278
|
+
|
|
1279
|
+
installed_files = []
|
|
1280
|
+
failed_files = []
|
|
1281
|
+
|
|
1220
1282
|
for i, row in enumerate(vals):
|
|
1221
1283
|
model = row[0]
|
|
1222
1284
|
partition_values = ""
|
|
@@ -1241,15 +1303,39 @@ class TableOperator:
|
|
|
1241
1303
|
with open(model_file_path, "wb") as f:
|
|
1242
1304
|
f.write(model)
|
|
1243
1305
|
|
|
1244
|
-
|
|
1245
|
-
from teradataml import install_file
|
|
1246
|
-
install_file(file_identifier=model_file, file_path=model_file_path,
|
|
1247
|
-
is_binary=True, suppress_output=True)
|
|
1248
|
-
elif self.__class__.__name__ == "Apply":
|
|
1249
|
-
self.env.install_file(file_path=model_file_path)
|
|
1250
|
-
|
|
1251
|
-
all_files.append(model_file)
|
|
1306
|
+
file_installed = __install_file(model_file, model_file_path)
|
|
1252
1307
|
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1308
|
+
if file_installed:
|
|
1309
|
+
installed_files.append(model_file)
|
|
1310
|
+
os.remove(model_file_path)
|
|
1311
|
+
else:
|
|
1312
|
+
# File failed to get installed in Vantage. Hence, keeping the file in tempdir.
|
|
1313
|
+
failed_files.append(model_file)
|
|
1314
|
+
|
|
1315
|
+
while retry and failed_files:
|
|
1316
|
+
# If there are any failed files and retry is not zero, retry installing the failed files.
|
|
1317
|
+
time.sleep(retry_timeout)
|
|
1318
|
+
retry_failed_files = []
|
|
1319
|
+
for model_file in failed_files:
|
|
1320
|
+
model_file_path = os.path.join(tempdir, model_file)
|
|
1321
|
+
file_installed = __install_file(model_file, model_file_path)
|
|
1322
|
+
|
|
1323
|
+
if file_installed:
|
|
1324
|
+
installed_files.append(model_file)
|
|
1325
|
+
os.remove(model_file_path)
|
|
1326
|
+
else:
|
|
1327
|
+
# File failed to get installed in Vantage. Hence, keeping the file in tempdir.
|
|
1328
|
+
retry_failed_files.append(model_file)
|
|
1329
|
+
failed_files = retry_failed_files
|
|
1330
|
+
retry -= 1
|
|
1331
|
+
|
|
1332
|
+
if failed_files:
|
|
1333
|
+
failed_files.sort()
|
|
1334
|
+
warning_message = "The following model files failed to get installed in Vantage:\n" + str(failed_files) + ".\n"
|
|
1335
|
+
warning_message += "Try manually deploying them from the path '" + tempdir + "' using:\n"
|
|
1336
|
+
warning_message += " - `install_file()` when connected to Enterprise/On-Prem system or\n"
|
|
1337
|
+
warning_message += " - `UserEnv.install_file()` when connected to Lake system.\n"
|
|
1338
|
+
warning_message += "OR\nRemove the returned installed files manually using `remove_file()` or `UserEnv.remove_file()`."
|
|
1339
|
+
warnings.warn(RuntimeWarning(warning_message))
|
|
1340
|
+
|
|
1341
|
+
return installed_files
|
|
@@ -481,6 +481,9 @@ class QueryGenerator:
|
|
|
481
481
|
return configure.read_nos_function_mapping.upper()
|
|
482
482
|
elif "WriteNOS".lower() == function_name.lower():
|
|
483
483
|
return configure.write_nos_function_mapping.upper()
|
|
484
|
+
# If Table Operator function is IMAGE2MATRIX, then return alias name as TD_IMAGE2MATRIX.
|
|
485
|
+
elif "IMAGE2MATRIX".lower() == function_name.lower():
|
|
486
|
+
return "TD_IMAGE2MATRIX"
|
|
484
487
|
|
|
485
488
|
engine_name = UtilFuncs._get_engine_name(self._engine)
|
|
486
489
|
|
|
@@ -231,7 +231,9 @@ class TableOperatorQueryGenerator(QueryGenerator):
|
|
|
231
231
|
using_clause = ""
|
|
232
232
|
# If the function is a NOS function, then USING clause is needed.
|
|
233
233
|
if self._function_name.lower() in [configure.write_nos_function_mapping.lower(),
|
|
234
|
-
configure.read_nos_function_mapping.lower()
|
|
234
|
+
configure.read_nos_function_mapping.lower(),
|
|
235
|
+
"td_image2matrix"
|
|
236
|
+
]:
|
|
235
237
|
using_clause = "USING"
|
|
236
238
|
invocation_sql = "{0}\n\t{1}{2}".format(invocation_sql, using_clause, self.__OTHER_ARG_CLAUSE)
|
|
237
239
|
|
|
@@ -9,8 +9,7 @@
|
|
|
9
9
|
# Description: Utilities for Table Operators. #
|
|
10
10
|
# #
|
|
11
11
|
# ##################################################################
|
|
12
|
-
|
|
13
|
-
import os
|
|
12
|
+
import os, json
|
|
14
13
|
import teradataml.dataframe as tdmldf
|
|
15
14
|
from teradataml.common.constants import TableOperatorConstants, \
|
|
16
15
|
TeradataConstants, OutputStyle
|
|
@@ -24,7 +23,8 @@ from teradataml.scriptmgmt.lls_utils import get_env
|
|
|
24
23
|
from teradataml.utils.utils import execute_sql
|
|
25
24
|
from teradataml.utils.validators import _Validators
|
|
26
25
|
from functools import partial
|
|
27
|
-
from inspect import isfunction
|
|
26
|
+
from inspect import isfunction, getsource
|
|
27
|
+
from pathlib import Path
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class _TableOperatorUtils:
|
|
@@ -281,12 +281,20 @@ class _TableOperatorUtils:
|
|
|
281
281
|
self.__validate()
|
|
282
282
|
"""
|
|
283
283
|
# Validate the user defined function.
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
284
|
+
|
|
285
|
+
if self.operation in [TableOperatorConstants.UDF_OP.value,\
|
|
286
|
+
TableOperatorConstants.REGISTER_OP.value]:
|
|
287
|
+
for udf_function in self.user_function:
|
|
288
|
+
if not isfunction(udf_function):
|
|
289
|
+
raise TypeError(Messages.get_message(
|
|
290
|
+
MessageCodes.UNSUPPORTED_DATATYPE, 'user_function', "'function'"))
|
|
291
|
+
else:
|
|
292
|
+
if not (isfunction(self.user_function) or
|
|
293
|
+
isinstance(self.user_function, partial)):
|
|
294
|
+
raise TypeError(Messages.get_message(
|
|
295
|
+
MessageCodes.UNSUPPORTED_DATATYPE, 'user_function',
|
|
296
|
+
"'function' or 'functools.partial'")
|
|
297
|
+
)
|
|
290
298
|
|
|
291
299
|
if arg_info_matrix is None:
|
|
292
300
|
arg_info_matrix = []
|
|
@@ -324,20 +332,30 @@ class _TableOperatorUtils:
|
|
|
324
332
|
EXAMPLES:
|
|
325
333
|
self.__create_user_script()
|
|
326
334
|
"""
|
|
327
|
-
#
|
|
328
|
-
#
|
|
329
|
-
# It has the format "<
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
335
|
+
# If operation is register, then generate script name based on the
|
|
336
|
+
# user function name and return type.
|
|
337
|
+
# It has the format "tdml_udf_name_<registered_name>_udf_type_<return_type>_register.py"
|
|
338
|
+
if self.operation == TableOperatorConstants.REGISTER_OP.value:
|
|
339
|
+
registered_name = list(self.returns.keys())[0]
|
|
340
|
+
return_type = self.returns[registered_name]
|
|
341
|
+
self.script_name = "tdml_udf_name_{}_udf_type_{}_register.py".format(registered_name, return_type)
|
|
342
|
+
self.script_base_name = Path(self.script_name).stem
|
|
343
|
+
else:
|
|
344
|
+
# Generate script name and alias, and add entry to a Garbage Collector.
|
|
345
|
+
# script_entry is the string that is added to Garbage collector.
|
|
346
|
+
# It has the format "<databasename>"."<file_id>".
|
|
347
|
+
self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
|
|
348
|
+
|
|
349
|
+
if self.operation not in [TableOperatorConstants.UDF_OP.value, TableOperatorConstants.REGISTER_OP.value]:
|
|
350
|
+
# Get the converters to use with pandas.read_csv, and to correctly
|
|
351
|
+
# typecast the numeric data.
|
|
352
|
+
python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
|
|
353
|
+
for col in self.data._metaexpr.c]
|
|
354
|
+
input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
|
|
355
|
+
|
|
356
|
+
python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
|
|
357
|
+
for type_ in list(self.returns.values())]
|
|
358
|
+
output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
|
|
341
359
|
|
|
342
360
|
# Create script in .teradataml directory.
|
|
343
361
|
script_dir = GarbageCollector._get_temp_dir_name()
|
|
@@ -349,37 +367,61 @@ class _TableOperatorUtils:
|
|
|
349
367
|
os.path.dirname(os.path.abspath(__file__))),
|
|
350
368
|
"table_operators",
|
|
351
369
|
"templates")
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
370
|
+
# Get the template.
|
|
371
|
+
template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
|
|
372
|
+
TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value,
|
|
373
|
+
TableOperatorConstants.REGISTER_OP.value: TableOperatorConstants.REGISTER_TEMPLATE.value }
|
|
374
|
+
template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
|
|
355
375
|
# Write to the script based on the template.
|
|
356
376
|
try:
|
|
357
377
|
with open(os.path.join(template_dir, template_name), 'r') as input_file:
|
|
358
378
|
with open(self.script_path, 'w') as output_file:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
379
|
+
if self.operation == TableOperatorConstants.UDF_OP.value:
|
|
380
|
+
|
|
381
|
+
user_function_code = UtilFuncs._func_to_string(self.user_function)
|
|
382
|
+
output_file.write(input_file.read().format(
|
|
383
|
+
DELIMITER=self.delimiter,
|
|
384
|
+
QUOTECHAR=self.quotechar,
|
|
385
|
+
FUNCTION_DEFINITION=user_function_code,
|
|
386
|
+
FUNCTION_ARGS =str(self.function_args),
|
|
387
|
+
INPUT_COLUMNS=json.dumps(self.data.columns),
|
|
388
|
+
OUTPUT_COLUMNS=json.dumps(list(self.returns.keys())),
|
|
389
|
+
COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
|
|
390
|
+
OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
|
|
391
|
+
))
|
|
392
|
+
elif self.operation == TableOperatorConstants.REGISTER_OP.value:
|
|
393
|
+
# Get the source code of the user function.
|
|
394
|
+
user_function_code = UtilFuncs._func_to_string(self.user_function)
|
|
395
|
+
output_file.write(input_file.read().format(
|
|
396
|
+
FUNCTION_DEFINITION=user_function_code,
|
|
397
|
+
FUNCTION_NAME = self.user_function[0].__name__
|
|
398
|
+
))
|
|
399
|
+
else:
|
|
400
|
+
# prepare script file from template file for maprow and mappartition.
|
|
401
|
+
output_file.write(
|
|
402
|
+
input_file.read().format(
|
|
403
|
+
DELIMITER=UtilFuncs._serialize_and_encode(
|
|
404
|
+
self.delimiter),
|
|
405
|
+
STO_OPERATION=UtilFuncs._serialize_and_encode(
|
|
406
|
+
self.operation),
|
|
407
|
+
USER_DEF_FUNC=UtilFuncs._serialize_and_encode(
|
|
408
|
+
self.user_function),
|
|
409
|
+
DF_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
|
|
410
|
+
self.data.columns),
|
|
411
|
+
DF_COL_TYPES_LIST=UtilFuncs._serialize_and_encode(
|
|
412
|
+
python_input_col_types),
|
|
413
|
+
OUTPUT_COL_NAMES_LIST=UtilFuncs._serialize_and_encode(
|
|
414
|
+
list(self.returns.keys())),
|
|
415
|
+
OUTPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
|
|
416
|
+
output_converters),
|
|
417
|
+
QUOTECHAR=UtilFuncs._serialize_and_encode(
|
|
418
|
+
self.quotechar),
|
|
419
|
+
INPUT_CONVERTERS=UtilFuncs._serialize_and_encode(
|
|
420
|
+
input_converters),
|
|
421
|
+
CHUNK_SIZE=UtilFuncs._serialize_and_encode(
|
|
422
|
+
self.chunk_size)
|
|
423
|
+
)
|
|
381
424
|
)
|
|
382
|
-
)
|
|
383
425
|
except Exception:
|
|
384
426
|
# We may end up here if the formatting of the templating to create
|
|
385
427
|
# the user script fails.
|
|
@@ -410,9 +452,11 @@ class _TableOperatorUtils:
|
|
|
410
452
|
"""
|
|
411
453
|
try:
|
|
412
454
|
if self.operation in [TableOperatorConstants.MAP_ROW_OP.value,
|
|
413
|
-
TableOperatorConstants.MAP_PARTITION_OP.value]
|
|
455
|
+
TableOperatorConstants.MAP_PARTITION_OP.value] or \
|
|
456
|
+
(self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'IN-DB'):
|
|
414
457
|
return self.__execute_script_table_operator()
|
|
415
|
-
elif self.operation == TableOperatorConstants.APPLY_OP.value
|
|
458
|
+
elif self.operation == TableOperatorConstants.APPLY_OP.value or \
|
|
459
|
+
(self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'REMOTE'):
|
|
416
460
|
return self.__execute_apply_table_operator()
|
|
417
461
|
except Exception:
|
|
418
462
|
raise
|
|
@@ -450,7 +494,6 @@ class _TableOperatorUtils:
|
|
|
450
494
|
script_name = script_alias # alias now contains extension also.
|
|
451
495
|
|
|
452
496
|
# Extract the base name without extension.
|
|
453
|
-
from pathlib import Path
|
|
454
497
|
script_base_name = Path(script_alias).stem
|
|
455
498
|
return script_entry, script_alias, script_name, script_base_name
|
|
456
499
|
|
|
@@ -572,8 +615,9 @@ class _TableOperatorUtils:
|
|
|
572
615
|
if self.exec_mode.upper() == TableOperatorConstants.REMOTE_EXEC.value:
|
|
573
616
|
# If not test mode, execute the script using Apply table operator.
|
|
574
617
|
try:
|
|
575
|
-
# If APPLY, get environment and use it for installing file.
|
|
576
|
-
if self.operation
|
|
618
|
+
# If APPLY or UDF, get environment and use it for installing file.
|
|
619
|
+
if self.operation in [TableOperatorConstants.APPLY_OP.value,
|
|
620
|
+
TableOperatorConstants.UDF_OP.value]:
|
|
577
621
|
self.__env.install_file(self.script_path, suppress_output=True)
|
|
578
622
|
|
|
579
623
|
# Execute the script.
|
|
@@ -617,13 +661,15 @@ class _TableOperatorUtils:
|
|
|
617
661
|
suppress_output=True)
|
|
618
662
|
|
|
619
663
|
# For apply, remove file from remote user environment.
|
|
620
|
-
if self.operation == TableOperatorConstants.APPLY_OP.value
|
|
664
|
+
if self.operation == TableOperatorConstants.APPLY_OP.value or \
|
|
665
|
+
(self.operation == TableOperatorConstants.UDF_OP.value and self.exec_mode == 'REMOTE'):
|
|
621
666
|
self.__env.remove_file(self.script_name, suppress_output=True)
|
|
622
667
|
|
|
623
668
|
# Remove the entry from Garbage Collector
|
|
624
669
|
if self.operation in [TableOperatorConstants.MAP_ROW_OP.value,
|
|
625
670
|
TableOperatorConstants.MAP_PARTITION_OP.value,
|
|
626
|
-
TableOperatorConstants.APPLY_OP.value
|
|
671
|
+
TableOperatorConstants.APPLY_OP.value,
|
|
672
|
+
TableOperatorConstants.UDF_OP.value]:
|
|
627
673
|
GarbageCollector._delete_object_entry(
|
|
628
674
|
object_to_delete=self.script_entry,
|
|
629
675
|
object_type=TeradataConstants.TERADATA_SCRIPT,
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys, csv
|
|
3
|
+
import datetime
|
|
4
|
+
import urllib.parse
|
|
5
|
+
|
|
6
|
+
td_buffer = {{}}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
{FUNCTION_DEFINITION}
|
|
10
|
+
|
|
11
|
+
# Decode the URL encoded string and store it back as dictionary.
|
|
12
|
+
dec = urllib.parse.unquote_plus(sys.argv[1])
|
|
13
|
+
script_data = json.loads(dec)
|
|
14
|
+
|
|
15
|
+
# Information that is required to help with the script usage.
|
|
16
|
+
# The delimiter to use with the input and output text.
|
|
17
|
+
delimiter = script_data["delimiter"]
|
|
18
|
+
# The quotechar to use.
|
|
19
|
+
quotechar = script_data["qoutechar"]
|
|
20
|
+
# The names of columns in the input teradataml DataFrame.
|
|
21
|
+
_input_columns = script_data["input_cols"]
|
|
22
|
+
# The names of columns in the output teradataml DataFrame.
|
|
23
|
+
_output_columns = script_data["output_cols"]
|
|
24
|
+
# The types of columns in the input/output teradataml DataFrame.
|
|
25
|
+
# The mapper of output column name to function arguments
|
|
26
|
+
function_args = script_data["function_args"]
|
|
27
|
+
# The definition for new columns in output.
|
|
28
|
+
columns_definitions = {{_output_columns[-1]: "{FUNCTION_NAME}"}}
|
|
29
|
+
output_type_converters = script_data["output_type_converters"]
|
|
30
|
+
for k,v in output_type_converters.items():
|
|
31
|
+
if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
|
|
32
|
+
output_type_converters[k] = 'str'
|
|
33
|
+
output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# The entry point to the script.
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
|
|
40
|
+
records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
|
|
41
|
+
for record in records:
|
|
42
|
+
record = dict(zip(_input_columns, record))
|
|
43
|
+
out_rec = []
|
|
44
|
+
for column in _output_columns:
|
|
45
|
+
|
|
46
|
+
# If it is a new column, get the value from definition.
|
|
47
|
+
if column in columns_definitions:
|
|
48
|
+
f_args = tuple()
|
|
49
|
+
# Convert the argument types first.
|
|
50
|
+
for v in function_args[column]:
|
|
51
|
+
if v in _input_columns:
|
|
52
|
+
c_type_ = output_type_converters.get(v)
|
|
53
|
+
if record[v]:
|
|
54
|
+
# If it is a float, replace the empty character.
|
|
55
|
+
if c_type_.__name__ == 'float':
|
|
56
|
+
arg = output_type_converters.get(v)(record[v].replace(' ', ''))
|
|
57
|
+
else:
|
|
58
|
+
arg = output_type_converters.get(v)(record[v])
|
|
59
|
+
else:
|
|
60
|
+
arg = record[v]
|
|
61
|
+
else:
|
|
62
|
+
arg = v
|
|
63
|
+
f_args = f_args + (arg, )
|
|
64
|
+
func_ = globals()[columns_definitions[column]]
|
|
65
|
+
out_rec.append(output_type_converters[column](func_(*f_args)))
|
|
66
|
+
else:
|
|
67
|
+
out_rec.append(record[column])
|
|
68
|
+
|
|
69
|
+
print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import sys, csv
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
td_buffer = {{}}
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
{FUNCTION_DEFINITION}
|
|
8
|
+
|
|
9
|
+
function_args = {FUNCTION_ARGS}
|
|
10
|
+
# Information that is required to help with the script usage.
|
|
11
|
+
# The delimiter to use with the input and output text.
|
|
12
|
+
delimiter = "{DELIMITER}"
|
|
13
|
+
# The names of columns in the input teradataml DataFrame.
|
|
14
|
+
_input_columns = {INPUT_COLUMNS}
|
|
15
|
+
# The names of columns in the output teradataml DataFrame.
|
|
16
|
+
_output_columns = {OUTPUT_COLUMNS}
|
|
17
|
+
# The definition for new columns in output.
|
|
18
|
+
columns_definitions = {COLUMNS_DEFINITIONS}
|
|
19
|
+
# The types of columns in the input/output teradataml DataFrame.
|
|
20
|
+
output_type_converters = {OUTPUT_TYPE_CONVERTERS}
|
|
21
|
+
for k,v in output_type_converters.items():
|
|
22
|
+
if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
|
|
23
|
+
output_type_converters[k] = 'str'
|
|
24
|
+
output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
|
|
25
|
+
# The quotechar to use.
|
|
26
|
+
quotechar = "{QUOTECHAR}"
|
|
27
|
+
if quotechar == "None":
|
|
28
|
+
quotechar = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# The entry point to the script.
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
|
|
34
|
+
records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
|
|
35
|
+
for record in records:
|
|
36
|
+
record = dict(zip(_input_columns, record))
|
|
37
|
+
out_rec = []
|
|
38
|
+
for column in _output_columns:
|
|
39
|
+
|
|
40
|
+
# If it is a new column, get the value from definition.
|
|
41
|
+
if column in columns_definitions:
|
|
42
|
+
f_args = tuple()
|
|
43
|
+
# Convert the argument types first.
|
|
44
|
+
for v in function_args[column]:
|
|
45
|
+
if v in _input_columns:
|
|
46
|
+
c_type_ = output_type_converters.get(v)
|
|
47
|
+
if record[v]:
|
|
48
|
+
# If it is a float, replace the empty character.
|
|
49
|
+
if c_type_.__name__ == 'float':
|
|
50
|
+
arg = output_type_converters.get(v)(record[v].replace(' ', ''))
|
|
51
|
+
else:
|
|
52
|
+
arg = output_type_converters.get(v)(record[v])
|
|
53
|
+
else:
|
|
54
|
+
arg = record[v]
|
|
55
|
+
else:
|
|
56
|
+
arg = v
|
|
57
|
+
f_args = f_args + (arg, )
|
|
58
|
+
func_ = globals()[columns_definitions[column]]
|
|
59
|
+
out_rec.append(output_type_converters[column](func_(*f_args)))
|
|
60
|
+
else:
|
|
61
|
+
out_rec.append(record[column])
|
|
62
|
+
|
|
63
|
+
print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
|
|
File without changes
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from teradatasqlalchemy.telemetry.queryband import QueryBand, collect_queryband as tdsqlalchemy_collect_queryband
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# Create a global variable to manage querybands for teradataml package.
|
|
6
|
+
global session_queryband
|
|
7
|
+
session_queryband = QueryBand()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def collect_queryband(*qb_deco_pos_args, **qb_deco_kwargs):
|
|
11
|
+
"""
|
|
12
|
+
DESCRIPTION:
|
|
13
|
+
Decorator for calling collect_queryband decorator in telemetry utility
|
|
14
|
+
in teradatasqlalchemy using session_queryband object and other positional
|
|
15
|
+
and keyword arguments expected by collect_queryband.
|
|
16
|
+
|
|
17
|
+
PARAMETERS:
|
|
18
|
+
qb_deco_pos_args:
|
|
19
|
+
Optional Argument.
|
|
20
|
+
Specifies the positional arguments accepted by collect_queryband
|
|
21
|
+
decorator in telemetry utility in teradatasqlalchemy.
|
|
22
|
+
|
|
23
|
+
qb_deco_kwargs:
|
|
24
|
+
Optional Argument.
|
|
25
|
+
Specifies the keyword arguments accepted by collect_queryband
|
|
26
|
+
decorator in telemetry utility in teradatasqlalchemy.
|
|
27
|
+
|
|
28
|
+
EXAMPLES:
|
|
29
|
+
>>> from teradataml.telemetry_utils.queryband import collect_queryband
|
|
30
|
+
# Example 1: Collect queryband for a standalone function.
|
|
31
|
+
@collect_queryband(queryband="CreateContext")
|
|
32
|
+
def create_context(host = None, username ...): ...
|
|
33
|
+
|
|
34
|
+
# Example 2: Collect queryband for a class method and use
|
|
35
|
+
# class attribute to retrive queryband string.
|
|
36
|
+
@collect_queryband(attr="func_name")
|
|
37
|
+
def _execute_query(self, persist=False, volatile=False):...
|
|
38
|
+
|
|
39
|
+
# Example 3: Collect queryband for a class method and use
|
|
40
|
+
# method of same class to retrive queryband string.
|
|
41
|
+
@collect_queryband(method="get_class_specific_queryband")
|
|
42
|
+
def _execute_query(self, persist=False, volatile=False):...
|
|
43
|
+
"""
|
|
44
|
+
def outer_wrapper(func):
|
|
45
|
+
@wraps(func)
|
|
46
|
+
def inner_wrapper(*func_args, **func_kwargs):
|
|
47
|
+
# Pass the required argument 'session_queryband' along with other
|
|
48
|
+
# expected arguments to collect_queryband() decorator which is
|
|
49
|
+
# imported as tdsqlalchemy_collect_queryband.
|
|
50
|
+
return tdsqlalchemy_collect_queryband(session_queryband, *qb_deco_pos_args, **qb_deco_kwargs)(func)(*func_args, **func_kwargs)
|
|
51
|
+
return inner_wrapper
|
|
52
|
+
return outer_wrapper
|