teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +112 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +224 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +6 -4
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +245 -306
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +313 -82
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +109 -146
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/constants.py +37 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +151 -120
- teradataml/common/messagecodes.py +4 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +97 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +72 -2
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
- teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +474 -41
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +658 -20
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +322 -16
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
- teradataml/options/__init__.py +3 -1
- teradataml/options/configure.py +14 -2
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +10 -6
- teradataml/scriptmgmt/lls_utils.py +3 -2
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +1 -1
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
# Python libraries
|
|
17
17
|
import time
|
|
18
|
+
import ast
|
|
18
19
|
|
|
19
20
|
# Teradata libraries
|
|
20
21
|
from teradataml.dataframe.dataframe import DataFrame
|
|
@@ -56,7 +57,12 @@ class _ModelEvaluator:
|
|
|
56
57
|
self.target_column = target_column
|
|
57
58
|
self.task_type = task_type
|
|
58
59
|
|
|
59
|
-
def model_evaluation(self,
|
|
60
|
+
def model_evaluation(self,
|
|
61
|
+
rank,
|
|
62
|
+
table_name_mapping,
|
|
63
|
+
data_node_id,
|
|
64
|
+
target_column_ind = True,
|
|
65
|
+
get_metrics = False):
|
|
60
66
|
"""
|
|
61
67
|
DESCRIPTION:
|
|
62
68
|
Function performs the model evaluation on the specified rank in leaderborad.
|
|
@@ -72,25 +78,32 @@ class _ModelEvaluator:
|
|
|
72
78
|
Specifies the mapping of train,test table names.
|
|
73
79
|
Types: dict
|
|
74
80
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
Specifies
|
|
78
|
-
|
|
79
|
-
Types: bool
|
|
81
|
+
data_node_id:
|
|
82
|
+
Required Argument.
|
|
83
|
+
Specifies the test data node id.
|
|
84
|
+
Types: str
|
|
80
85
|
|
|
81
86
|
target_column_ind:
|
|
82
87
|
Optional Argument.
|
|
83
88
|
Specifies whether target column is present in the dataset or not.
|
|
89
|
+
Default Value: True
|
|
90
|
+
Types: bool
|
|
91
|
+
|
|
92
|
+
get_metrics:
|
|
93
|
+
Optional Argument.
|
|
94
|
+
Specifies whether to return metrics or not.
|
|
84
95
|
Default Value: False
|
|
96
|
+
Types: bool
|
|
85
97
|
|
|
86
98
|
RETURNS:
|
|
87
99
|
tuple containing, performance metrics and predicitions of specified rank ML model.
|
|
88
100
|
|
|
89
101
|
"""
|
|
90
|
-
# Setting
|
|
91
|
-
self.test_data_ind = test_data_ind
|
|
102
|
+
# Setting target column indicator
|
|
92
103
|
self.target_column_ind = target_column_ind
|
|
93
104
|
self.table_name_mapping = table_name_mapping
|
|
105
|
+
self.data_node_id = data_node_id
|
|
106
|
+
self.get_metrics = get_metrics
|
|
94
107
|
|
|
95
108
|
# Return predictions only if test data is present and target column is not present
|
|
96
109
|
return self._evaluator(rank)
|
|
@@ -114,38 +127,34 @@ class _ModelEvaluator:
|
|
|
114
127
|
"""
|
|
115
128
|
# Extracting model using rank
|
|
116
129
|
model = self.model_info.loc[rank]
|
|
130
|
+
|
|
131
|
+
ml_name = self.model_info.loc[rank]['MODEL_ID'].split('_')[0]
|
|
117
132
|
|
|
118
133
|
# Defining eval_params
|
|
119
|
-
eval_params = _ModelTraining._eval_params_generation(
|
|
134
|
+
eval_params = _ModelTraining._eval_params_generation(ml_name,
|
|
120
135
|
self.target_column,
|
|
121
136
|
self.task_type)
|
|
122
137
|
|
|
123
|
-
#
|
|
124
|
-
test = DataFrame(self.table_name_mapping['{}
|
|
125
|
-
|
|
126
|
-
# Getting test data from table
|
|
127
|
-
if not self.test_data_ind:
|
|
128
|
-
# Test Data
|
|
129
|
-
test = DataFrame(self.table_name_mapping['{}_test'.format(model['Feature-Selection'])])
|
|
130
|
-
else:
|
|
131
|
-
test = DataFrame(self.table_name_mapping['{}_new_test'.format(model['Feature-Selection'])])
|
|
132
|
-
|
|
133
|
-
print("\nFollowing model is being used for generating prediction :")
|
|
134
|
-
print("Model ID :", model['Model-ID'],
|
|
135
|
-
"\nFeature Selection Method :",model['Feature-Selection'])
|
|
138
|
+
# Extracting test data for evaluation based on data node id
|
|
139
|
+
test = DataFrame(self.table_name_mapping[self.data_node_id]['{}_new_test'.format(model['FEATURE_SELECTION'])])
|
|
136
140
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
+
print("\nFollowing model is being picked for evaluation:")
|
|
142
|
+
print("Model ID :", model['MODEL_ID'],
|
|
143
|
+
"\nFeature Selection Method :",model['FEATURE_SELECTION'])
|
|
144
|
+
|
|
145
|
+
if self.task_type.lower() == 'classification':
|
|
146
|
+
params = ast.literal_eval(model['PARAMETERS'])
|
|
147
|
+
eval_params['output_responses'] = params['output_responses']
|
|
148
|
+
|
|
149
|
+
# Mapping data according to model type
|
|
150
|
+
data_map = 'test_data' if ml_name == 'KNN' else 'newdata'
|
|
151
|
+
# Performing evaluation if get_metrics is True else returning predictions
|
|
152
|
+
if self.get_metrics:
|
|
153
|
+
metrics = model['model-obj'].evaluate(**{data_map: test}, **eval_params)
|
|
154
|
+
return metrics
|
|
141
155
|
else:
|
|
142
|
-
#
|
|
143
|
-
if
|
|
156
|
+
# Removing accumulate parameter if target column is not present
|
|
157
|
+
if not self.target_column_ind:
|
|
144
158
|
eval_params.pop("accumulate")
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
# Return both metrics and predictions for all other cases
|
|
148
|
-
metrics = model['model-obj'].evaluate(newdata=test, **eval_params)
|
|
149
|
-
pred = model['model-obj'].predict(newdata=test, **eval_params)
|
|
150
|
-
|
|
151
|
-
return (metrics, pred)
|
|
159
|
+
pred = model['model-obj'].predict(**{data_map: test}, **eval_params)
|
|
160
|
+
return pred
|
|
@@ -19,6 +19,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
19
19
|
import math
|
|
20
20
|
import pandas as pd
|
|
21
21
|
from itertools import product
|
|
22
|
+
import numpy as np
|
|
22
23
|
|
|
23
24
|
# Teradata libraries
|
|
24
25
|
from teradataml.context import context as tdmlctx
|
|
@@ -38,7 +39,8 @@ class _ModelTraining:
|
|
|
38
39
|
verbose=0,
|
|
39
40
|
features=None,
|
|
40
41
|
task_type="Regression",
|
|
41
|
-
custom_data = None
|
|
42
|
+
custom_data = None,
|
|
43
|
+
**kwargs):
|
|
42
44
|
"""
|
|
43
45
|
DESCRIPTION:
|
|
44
46
|
Function initializes the data, target column, features and models
|
|
@@ -89,6 +91,28 @@ class _ModelTraining:
|
|
|
89
91
|
Optional Argument.
|
|
90
92
|
Specifies json object containing user customized input.
|
|
91
93
|
Types: json object
|
|
94
|
+
|
|
95
|
+
**kwargs:
|
|
96
|
+
Specifies the additional arguments for model training. Below
|
|
97
|
+
are the additional arguments:
|
|
98
|
+
volatile:
|
|
99
|
+
Optional Argument.
|
|
100
|
+
Specifies whether to put the interim results of the
|
|
101
|
+
functions in a volatile table or not. When set to
|
|
102
|
+
True, results are stored in a volatile table,
|
|
103
|
+
otherwise not.
|
|
104
|
+
Default Value: False
|
|
105
|
+
Types: bool
|
|
106
|
+
|
|
107
|
+
persist:
|
|
108
|
+
Optional Argument.
|
|
109
|
+
Specifies whether to persist the interim results of the
|
|
110
|
+
functions in a table or not. When set to True,
|
|
111
|
+
results are persisted in a table; otherwise,
|
|
112
|
+
results are garbage collected at the end of the
|
|
113
|
+
session.
|
|
114
|
+
Default Value: False
|
|
115
|
+
Types: bool
|
|
92
116
|
"""
|
|
93
117
|
self.data = data
|
|
94
118
|
self.target_column = target_column
|
|
@@ -99,6 +123,8 @@ class _ModelTraining:
|
|
|
99
123
|
self.custom_data = custom_data
|
|
100
124
|
self.labels = self.data.drop_duplicate(self.target_column).size
|
|
101
125
|
self.startify_col = None
|
|
126
|
+
self.persist = kwargs.get("persist", False)
|
|
127
|
+
self.volatile = kwargs.get("volatile", False)
|
|
102
128
|
|
|
103
129
|
def model_training(self,
|
|
104
130
|
auto=True,
|
|
@@ -278,20 +304,25 @@ class _ModelTraining:
|
|
|
278
304
|
"""
|
|
279
305
|
# Creating a copy to avoid use of same reference of memory
|
|
280
306
|
if self.task_type != "Regression":
|
|
281
|
-
sorted_model_df = trained_models_info.sort_values(by=['
|
|
282
|
-
|
|
307
|
+
sorted_model_df = trained_models_info.sort_values(by=['MICRO-F1', 'WEIGHTED-F1'],
|
|
308
|
+
ascending=[False, False]).reset_index(drop=True)
|
|
283
309
|
else:
|
|
284
|
-
sorted_model_df = trained_models_info.sort_values(by='R2
|
|
285
|
-
|
|
310
|
+
sorted_model_df = trained_models_info.sort_values(by='R2',
|
|
311
|
+
ascending=False).reset_index(drop=True)
|
|
312
|
+
|
|
286
313
|
|
|
287
314
|
# Adding rank to leaderboard
|
|
288
|
-
sorted_model_df.insert(0, '
|
|
315
|
+
sorted_model_df.insert(0, 'RANK', sorted_model_df.index + 1)
|
|
316
|
+
|
|
317
|
+
# Internal Data list for leaderboard
|
|
318
|
+
dp_lst = ["model-obj", "DATA_TABLE", "RESULT_TABLE", "PARAMETERS"]
|
|
289
319
|
|
|
290
320
|
# Excluding the model object and model name from leaderboard
|
|
291
|
-
leaderboard = sorted_model_df.drop(
|
|
321
|
+
leaderboard = sorted_model_df.drop(dp_lst, axis=1)
|
|
322
|
+
|
|
292
323
|
# filtering the rows based on the max_models
|
|
293
324
|
if self.max_models is not None:
|
|
294
|
-
leaderboard = leaderboard[leaderboard["
|
|
325
|
+
leaderboard = leaderboard[leaderboard["RANK"] <= self.max_models]
|
|
295
326
|
|
|
296
327
|
self._display_msg(msg="Leaderboard",
|
|
297
328
|
progress_bar=self.progress_bar,
|
|
@@ -436,28 +467,24 @@ class _ModelTraining:
|
|
|
436
467
|
max_depth.extend([6, 7, 8])
|
|
437
468
|
min_node_size.extend([2])
|
|
438
469
|
iter_num.extend([20])
|
|
439
|
-
num_trees.extend([10, 20])
|
|
440
470
|
elif num_rows < 10000 and num_cols < 15:
|
|
441
471
|
min_impurity.extend([0.1, 0.2])
|
|
442
472
|
shrinkage_factor.extend([0.1, 0.3])
|
|
443
473
|
max_depth.extend([6, 8, 10])
|
|
444
474
|
min_node_size.extend([2, 3])
|
|
445
475
|
iter_num.extend([20, 30])
|
|
446
|
-
num_trees.extend([20, 30])
|
|
447
476
|
elif num_rows < 100000 and num_cols < 20:
|
|
448
477
|
min_impurity.extend([0.2, 0.3])
|
|
449
478
|
shrinkage_factor.extend([0.01, 0.1, 0.2])
|
|
450
479
|
max_depth.extend([4, 6, 7])
|
|
451
480
|
min_node_size.extend([3, 4])
|
|
452
481
|
iter_num.extend([30, 40])
|
|
453
|
-
num_trees.extend([30, 40])
|
|
454
482
|
else:
|
|
455
483
|
min_impurity.extend([0.1, 0.2, 0.3])
|
|
456
484
|
shrinkage_factor.extend([0.01, 0.05, 0.1])
|
|
457
485
|
max_depth.extend([3, 4, 7, 8])
|
|
458
486
|
min_node_size.extend([2, 3, 4])
|
|
459
487
|
iter_num.extend([20, 30, 40])
|
|
460
|
-
num_trees.extend([20, 30, 40])
|
|
461
488
|
|
|
462
489
|
# Hyperparameters for XGBoost model
|
|
463
490
|
xgb_params = {
|
|
@@ -736,12 +763,15 @@ class _ModelTraining:
|
|
|
736
763
|
|
|
737
764
|
# Hyperparameters for each model
|
|
738
765
|
model_params = parameters[:min(len(parameters), 5)]
|
|
739
|
-
self._display_msg(msg="\nPerforming
|
|
766
|
+
self._display_msg(msg="\nPerforming hyperparameter tuning ...", progress_bar=self.progress_bar)
|
|
740
767
|
|
|
741
|
-
# Defining training
|
|
768
|
+
# Defining training data
|
|
742
769
|
data_types = ['lasso', 'rfe', 'pca']
|
|
743
770
|
trainng_datas = tuple(DataFrame(self.table_name_mapping[f'{data_type}_train']) for data_type in data_types)
|
|
744
|
-
|
|
771
|
+
|
|
772
|
+
if self.task_type == "Classification":
|
|
773
|
+
response_values = trainng_datas[0].get(self.target_column).drop_duplicate().get_values().flatten().tolist()
|
|
774
|
+
self.output_response = [str(i) for i in response_values]
|
|
745
775
|
|
|
746
776
|
if self.stopping_metric is None:
|
|
747
777
|
self.stopping_tolerance, self.stopping_metric = 1.0, 'MICRO-F1' \
|
|
@@ -765,115 +795,15 @@ class _ModelTraining:
|
|
|
765
795
|
|
|
766
796
|
trained_models = []
|
|
767
797
|
for param in model_params:
|
|
768
|
-
result = self._hyperparameter_tunning(param, trainng_datas
|
|
798
|
+
result = self._hyperparameter_tunning(param, trainng_datas)
|
|
769
799
|
trained_models.append(result)
|
|
770
800
|
|
|
771
801
|
models_df = pd.concat(trained_models, ignore_index=True)
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
trained_models_info = self._model_scoring(testing_datas, models_df)
|
|
775
|
-
trained_models_info = trained_models_info.reset_index(drop=True)
|
|
776
|
-
|
|
777
|
-
return trained_models_info
|
|
778
|
-
|
|
779
|
-
def _model_scoring(self,
|
|
780
|
-
test_data,
|
|
781
|
-
model_info):
|
|
782
|
-
"""
|
|
783
|
-
DESCRIPTION:
|
|
784
|
-
Internal function generates the performance metrics for
|
|
785
|
-
trained ML models using testing dataset.
|
|
786
|
-
|
|
787
|
-
PARAMETERS:
|
|
788
|
-
test_data
|
|
789
|
-
Required Argument.
|
|
790
|
-
Specifies the testing datasets
|
|
791
|
-
Types: tuple of Teradataml DataFrame
|
|
792
|
-
|
|
793
|
-
model_info
|
|
794
|
-
Required Argument.
|
|
795
|
-
Specifies the trained models information.
|
|
796
|
-
Types: Pandas DataFrame
|
|
797
|
-
|
|
798
|
-
RETURNS:
|
|
799
|
-
Pandas DataFrame containing, trained models with their performance metrics.
|
|
800
|
-
"""
|
|
801
|
-
self._display_msg(msg="Evaluating models performance ...",
|
|
802
|
-
progress_bar = self.progress_bar,
|
|
803
|
-
show_data=True)
|
|
804
|
-
# Empty list for storing model performance metrics
|
|
805
|
-
model_performance_data = []
|
|
806
|
-
|
|
807
|
-
# Mapping feature selection methods to corresponding test data
|
|
808
|
-
feature_selection_to_test_data = {"lasso": test_data[0],
|
|
809
|
-
"rfe": test_data[1],
|
|
810
|
-
"pca": test_data[2]}
|
|
811
|
-
|
|
812
|
-
# Iterating over models
|
|
813
|
-
for index, model_row in model_info.iterrows():
|
|
814
|
-
# Extracting model name, model id, feature selection method, and model object
|
|
815
|
-
model_name, model_id, feature_selection, model_object = model_row['Name'], \
|
|
816
|
-
model_row['Model-ID'], model_row['Feature-Selection'], model_row['obj']
|
|
817
|
-
|
|
818
|
-
# Selecting test data based on feature selection method
|
|
819
|
-
test_set = feature_selection_to_test_data[feature_selection]
|
|
820
|
-
|
|
821
|
-
# Model evaluation
|
|
822
|
-
if model_name == 'knn':
|
|
823
|
-
performance_metrics = model_object.evaluate(test_data=test_set)
|
|
824
|
-
else:
|
|
825
|
-
eval_params = _ModelTraining._eval_params_generation(model_name,
|
|
826
|
-
self.target_column,
|
|
827
|
-
self.task_type)
|
|
828
|
-
performance_metrics = model_object.evaluate(newdata=test_set, **eval_params)
|
|
829
|
-
|
|
830
|
-
# Extracting performance metrics
|
|
831
|
-
if self.is_classification_type():
|
|
832
|
-
# Classification
|
|
833
|
-
# Extract performance metrics from the output data
|
|
834
|
-
performance_metrics_list = [metric[2] for metric in performance_metrics.output_data.itertuples()]
|
|
835
|
-
|
|
836
|
-
# Combine all the elements to form a new row
|
|
837
|
-
new_row = [model_name, model_id, feature_selection] + performance_metrics_list + [model_object]
|
|
838
|
-
else:
|
|
839
|
-
# Regression
|
|
840
|
-
regression_metrics = next(performance_metrics.result.itertuples())
|
|
841
|
-
sample_size = test_set.select('id').size
|
|
842
|
-
feature_count = len(test_set.columns) - 2
|
|
843
|
-
r2_score = regression_metrics[8]
|
|
844
|
-
adjusted_r2_score = 1 - ((1 - r2_score) * (sample_size - 1) / (sample_size - feature_count - 1))
|
|
845
|
-
new_row = [model_name, model_id, feature_selection, regression_metrics[0],
|
|
846
|
-
regression_metrics[1], regression_metrics[2], regression_metrics[5],
|
|
847
|
-
regression_metrics[6], r2_score, adjusted_r2_score, model_object]
|
|
848
|
-
|
|
849
|
-
model_performance_data.append(new_row)
|
|
850
|
-
|
|
851
|
-
if self.is_classification_type():
|
|
852
|
-
model_metrics_df = pd.DataFrame(model_performance_data, columns=['Name','Model-ID',
|
|
853
|
-
'Feature-Selection','Accuracy','Micro-Precision',
|
|
854
|
-
'Micro-Recall','Micro-F1',
|
|
855
|
-
'Macro-Precision','Macro-Recall',
|
|
856
|
-
'Macro-F1','Weighted-Precision',
|
|
857
|
-
'Weighted-Recall','Weighted-F1',
|
|
858
|
-
'model-obj'])
|
|
859
|
-
else:
|
|
860
|
-
model_metrics_df = pd.DataFrame(model_performance_data, columns=['Name', 'Model-ID',
|
|
861
|
-
'Feature-Selection',
|
|
862
|
-
'MAE', 'MSE', 'MSLE',
|
|
863
|
-
'RMSE', 'RMSLE',
|
|
864
|
-
'R2-score',
|
|
865
|
-
'Adjusted R2-score',
|
|
866
|
-
'model-obj'])
|
|
867
|
-
self._display_msg(msg="Evaluation completed.",
|
|
868
|
-
progress_bar = self.progress_bar,
|
|
869
|
-
show_data=True)
|
|
870
|
-
|
|
871
|
-
return model_metrics_df
|
|
872
|
-
|
|
802
|
+
return models_df
|
|
803
|
+
|
|
873
804
|
def _hyperparameter_tunning(self,
|
|
874
805
|
model_param,
|
|
875
|
-
train_data
|
|
876
|
-
test_data):
|
|
806
|
+
train_data):
|
|
877
807
|
"""
|
|
878
808
|
DESCRIPTION:
|
|
879
809
|
Internal function performs hyperparameter tuning on
|
|
@@ -890,11 +820,6 @@ class _ModelTraining:
|
|
|
890
820
|
Specifies the training datasets.
|
|
891
821
|
Types: tuple of Teradataml DataFrame
|
|
892
822
|
|
|
893
|
-
test_data
|
|
894
|
-
Required Argument.
|
|
895
|
-
Specifies the testing datasets
|
|
896
|
-
Types: tuple of Teradataml DataFrame
|
|
897
|
-
|
|
898
823
|
RETURNS:
|
|
899
824
|
pandas DataFrame containing, trained models information.
|
|
900
825
|
"""
|
|
@@ -910,13 +835,21 @@ class _ModelTraining:
|
|
|
910
835
|
# Input columns for model
|
|
911
836
|
model_param['input_columns'] = self.features
|
|
912
837
|
|
|
838
|
+
# Setting persist for model
|
|
839
|
+
model_param['persist'] = self.persist
|
|
840
|
+
|
|
913
841
|
self._display_msg(msg=model_param['name'],
|
|
914
842
|
progress_bar=self.progress_bar,
|
|
915
843
|
show_data=True)
|
|
916
844
|
|
|
917
|
-
#
|
|
845
|
+
# As we are using entire data for HPT training. So,
|
|
846
|
+
# passing prepared training data as test_data for KNN.
|
|
918
847
|
if model_param['name'] == 'knn':
|
|
919
|
-
model_param['test_data'] =
|
|
848
|
+
model_param['test_data'] = train_data
|
|
849
|
+
|
|
850
|
+
if self.task_type == "Classification":
|
|
851
|
+
model_param['output_prob'] = True
|
|
852
|
+
model_param['output_responses'] = self.output_response
|
|
920
853
|
|
|
921
854
|
# Using RandomSearch for hyperparameter tunning when max_models is given.
|
|
922
855
|
# Otherwise, using GridSearch for hyperparameter tunning.
|
|
@@ -951,20 +884,35 @@ class _ModelTraining:
|
|
|
951
884
|
sample_id_column='id',stratify_column=self.startify_col, verbose=verbose, max_time=self.max_runtime_secs)
|
|
952
885
|
|
|
953
886
|
# Getting all passed models
|
|
954
|
-
|
|
887
|
+
model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
|
|
888
|
+
on='MODEL_ID', how='inner')
|
|
955
889
|
# Creating mapping data ID to feature selection method
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
#
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
890
|
+
data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
|
|
891
|
+
"DF_1": ('rfe', train_data[1]._table_name),
|
|
892
|
+
"DF_2": ('pca', train_data[2]._table_name)}
|
|
893
|
+
|
|
894
|
+
# Updating model stats with feature selection method and result table
|
|
895
|
+
for index, row in model_info.iterrows():
|
|
896
|
+
model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
|
|
897
|
+
model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
|
|
898
|
+
model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
|
|
899
|
+
model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
|
|
900
|
+
|
|
901
|
+
# Dropping column 'DATA_ID'
|
|
902
|
+
model_info.drop(['DATA_ID'], axis=1, inplace=True)
|
|
903
|
+
|
|
904
|
+
model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
|
|
905
|
+
|
|
906
|
+
if not self.is_classification_type():
|
|
907
|
+
# Calculating Adjusted-R2 for regression
|
|
908
|
+
# Getting size and feature count for each feature selection method
|
|
909
|
+
methods = ["lasso", "rfe", "pca"]
|
|
910
|
+
size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
|
|
911
|
+
feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
|
|
912
|
+
model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
|
|
913
|
+
1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
|
|
914
|
+
(size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
|
|
915
|
+
|
|
968
916
|
self._display_msg(msg="-"*100,
|
|
969
917
|
progress_bar=self.progress_bar,
|
|
970
918
|
show_data=True)
|
|
@@ -1006,21 +954,36 @@ class _ModelTraining:
|
|
|
1006
954
|
# Setting the eval_params
|
|
1007
955
|
eval_params = {"id_column": "id",
|
|
1008
956
|
"accumulate": target_column}
|
|
957
|
+
|
|
958
|
+
model_type = {
|
|
959
|
+
'xgboost': 'model_type',
|
|
960
|
+
'glm': 'model_type',
|
|
961
|
+
'decisionforest': 'tree_type',
|
|
962
|
+
'svm': 'model_type',
|
|
963
|
+
'knn': 'model_type'
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
ml_name = ml_name.replace('_', '').lower()
|
|
1009
967
|
|
|
1010
968
|
# For Classification
|
|
1011
969
|
if task_type.lower() != "regression":
|
|
970
|
+
eval_params[model_type[ml_name]] = 'Classification'
|
|
971
|
+
eval_params['output_prob'] = True
|
|
972
|
+
|
|
1012
973
|
if ml_name == 'xgboost':
|
|
1013
|
-
eval_params['model_type'] = 'Classification'
|
|
1014
974
|
eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter','class_num', 'tree_order']
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
eval_params['output_prob'] = True
|
|
975
|
+
|
|
976
|
+
elif ml_name == 'glm':
|
|
977
|
+
eval_params['family'] = 'BINOMIAL'
|
|
978
|
+
|
|
1020
979
|
else:
|
|
1021
980
|
# For Regression
|
|
981
|
+
eval_params[model_type[ml_name]] = 'Regression'
|
|
982
|
+
|
|
1022
983
|
if ml_name == 'xgboost':
|
|
1023
|
-
eval_params['model_type'] = 'Regression'
|
|
1024
984
|
eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter', 'tree_order']
|
|
985
|
+
|
|
986
|
+
elif ml_name == 'glm':
|
|
987
|
+
eval_params['family'] = 'GAUSSIAN'
|
|
1025
988
|
|
|
1026
989
|
return eval_params
|
teradataml/catalog/byom.py
CHANGED
|
@@ -26,7 +26,7 @@ from teradataml.options.display import display
|
|
|
26
26
|
from teradataml.common.constants import ModelCatalogingConstants as mac
|
|
27
27
|
from teradataml.options.configure import configure
|
|
28
28
|
from teradataml.utils.utils import execute_sql
|
|
29
|
-
from
|
|
29
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
30
30
|
|
|
31
31
|
validator = _Validators()
|
|
32
32
|
|
|
@@ -541,13 +541,12 @@ def save_byom(model_id,
|
|
|
541
541
|
# If exists, extract required information about table columns types
|
|
542
542
|
# else extract from additional_columns_types.
|
|
543
543
|
# Also validate model_id against allowed length.
|
|
544
|
-
table_exists = connection.dialect.has_table(connection, table_name=table_name,
|
|
544
|
+
table_exists = connection.dialect.has_table(connection, table_name=table_name,
|
|
545
|
+
schema=schema_name, table_only=True)
|
|
545
546
|
if table_exists:
|
|
546
547
|
# Check if model exists or not. If exists, raise error.
|
|
547
548
|
__check_if_model_exists(
|
|
548
549
|
model_id, table_name, schema_name, raise_error_if_model_found=True)
|
|
549
|
-
if len(additional_columns_types) != 0:
|
|
550
|
-
warnings.warn("Argument additional_columns_types is ignored since table already exists.", stacklevel=2)
|
|
551
550
|
|
|
552
551
|
# Gather column name and type information from existing table
|
|
553
552
|
existing_table_df = DataFrame(in_schema(schema_name, table_name))
|
|
@@ -807,7 +806,7 @@ def delete_byom(model_id, table_name=None, schema_name=None):
|
|
|
807
806
|
|
|
808
807
|
# Before proceed further, check whether table exists or not.
|
|
809
808
|
conn = get_connection()
|
|
810
|
-
if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name):
|
|
809
|
+
if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name, table_only=True):
|
|
811
810
|
error_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
|
|
812
811
|
error_msg = Messages.get_message(
|
|
813
812
|
error_code, "delete", 'Table "{}.{}" does not exist.'.format(schema_name, table_name))
|
|
@@ -1472,7 +1471,7 @@ def retrieve_byom(model_id,
|
|
|
1472
1471
|
|
|
1473
1472
|
# Before proceeding further, check whether table exists or not.
|
|
1474
1473
|
conn = get_connection()
|
|
1475
|
-
if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name):
|
|
1474
|
+
if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name, table_only=True):
|
|
1476
1475
|
error_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
|
|
1477
1476
|
error_msg = Messages.get_message(
|
|
1478
1477
|
error_code, "retrieve", 'Table "{}.{}" does not exist.'.format(schema_name, table_name))
|
|
@@ -1535,7 +1534,8 @@ def retrieve_byom(model_id,
|
|
|
1535
1534
|
license_table = in_schema(license_schema_name, license_table_name)
|
|
1536
1535
|
|
|
1537
1536
|
# Check whether license table exists or not before proceed further.
|
|
1538
|
-
if not conn.dialect.has_table(conn, table_name=license_table_name, schema=license_schema_name
|
|
1537
|
+
if not conn.dialect.has_table(conn, table_name=license_table_name, schema=license_schema_name,
|
|
1538
|
+
table_only=True):
|
|
1539
1539
|
error_code = MessageCodes.EXECUTION_FAILED
|
|
1540
1540
|
error_msg = Messages.get_message(
|
|
1541
1541
|
error_code, "retrieve the model", 'Table "{}" does not exist.'.format(license_table))
|
|
@@ -1723,7 +1723,7 @@ def list_byom(table_name=None, schema_name=None, model_id=None):
|
|
|
1723
1723
|
|
|
1724
1724
|
# Before proceeding further, check whether table exists or not.
|
|
1725
1725
|
conn = get_connection()
|
|
1726
|
-
if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name):
|
|
1726
|
+
if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name, table_only=True):
|
|
1727
1727
|
error_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
|
|
1728
1728
|
error_msg = Messages.get_message(
|
|
1729
1729
|
error_code, "list", 'Table "{}.{}" does not exist.'.format(schema_name, table_name))
|
|
@@ -425,7 +425,7 @@ class _DAWorkflow:
|
|
|
425
425
|
"""
|
|
426
426
|
device_cfg = requests.post(
|
|
427
427
|
url=self.device_auth_end_point,
|
|
428
|
-
data={'client_id': self.__client_id})
|
|
428
|
+
data={'client_id': self.__client_id, 'scope': 'openid'})
|
|
429
429
|
|
|
430
430
|
# Check the status. If response is not 200, raise error.
|
|
431
431
|
_Validators._validate_http_response(device_cfg, 200, "get the device metadata")
|
teradataml/common/constants.py
CHANGED
|
@@ -425,6 +425,8 @@ class TableOperatorConstants(Enum):
|
|
|
425
425
|
MAP_TEMPLATE = "dataframe_map.template"
|
|
426
426
|
# Template of the intermediate script that will be generated.
|
|
427
427
|
APPLY_TEMPLATE = "dataframe_apply.template"
|
|
428
|
+
# Template of the intermediate script that will be generated for UDF.
|
|
429
|
+
UDF_TEMPLATE = "dataframe_udf.template"
|
|
428
430
|
# In-DB execution mode.
|
|
429
431
|
INDB_EXEC = "IN-DB"
|
|
430
432
|
# Local execution mode.
|
|
@@ -439,6 +441,8 @@ class TableOperatorConstants(Enum):
|
|
|
439
441
|
MAP_PARTITION_OP = "map_partition"
|
|
440
442
|
# apply operation.
|
|
441
443
|
APPLY_OP = "apply"
|
|
444
|
+
# udf operation.
|
|
445
|
+
UDF_OP = "udf"
|
|
442
446
|
# Template of the script_executor that will be used to generate the temporary script_executor file.
|
|
443
447
|
SCRIPT_TEMPLATE = "script_executor.template"
|
|
444
448
|
# Log Type.
|
|
@@ -1369,6 +1373,7 @@ class TeradataAnalyticFunctionTypes(Enum):
|
|
|
1369
1373
|
UAF = "UAF"
|
|
1370
1374
|
TABLEOPERATOR = "TABLE_OPERATOR"
|
|
1371
1375
|
BYOM = "BYOM"
|
|
1376
|
+
STORED_PROCEDURE = "STORED_PROCEDURE"
|
|
1372
1377
|
|
|
1373
1378
|
|
|
1374
1379
|
class TeradataAnalyticFunctionInfo(Enum):
|
|
@@ -1379,6 +1384,8 @@ class TeradataAnalyticFunctionInfo(Enum):
|
|
|
1379
1384
|
TABLE_OPERATOR = {"func_type": "tableoperator", "lowest_version": "17.00 ",
|
|
1380
1385
|
"display_function_type_name" :"TABLE OPERATOR"}
|
|
1381
1386
|
BYOM = {"func_type": "byom", "lowest_version": None, "display_function_type_name": "BYOM"}
|
|
1387
|
+
STORED_PROCEDURE = {"func_type": "storedprocedure", "lowest_version": "17.20", "display_function_type_name": "UAF",
|
|
1388
|
+
"metadata_class" : "_AnlyFuncMetadataUAF"}
|
|
1382
1389
|
|
|
1383
1390
|
class TeradataUAFSpecificArgs(Enum):
|
|
1384
1391
|
INPUT_MODE = "input_mode"
|
|
@@ -1436,3 +1443,33 @@ class CloudProvider(Enum):
|
|
|
1436
1443
|
# and '2018-03-28', using the latest one.
|
|
1437
1444
|
X_MS_VERSION = "2019-12-12"
|
|
1438
1445
|
X_MS_BLOB_TYPE = "BlockBlob"
|
|
1446
|
+
|
|
1447
|
+
class SessionParamsSQL:
|
|
1448
|
+
# Holds the SQL Statements for Session params.
|
|
1449
|
+
TIMEZONE = "SET TIME ZONE {}"
|
|
1450
|
+
ACCOUNT = "SET SESSION ACCOUNT = '{}' FOR {}"
|
|
1451
|
+
CALENDAR = "SET SESSION CALENDAR = {}"
|
|
1452
|
+
CHARACTER_SET_UNICODE = "SET SESSION CHARACTER SET UNICODE PASS THROUGH {}"
|
|
1453
|
+
COLLATION = "SET SESSION COLLATION {}"
|
|
1454
|
+
CONSTRAINT = "SET SESSION CONSTRAINT = {}"
|
|
1455
|
+
DATABASE = "SET SESSION DATABASE {}"
|
|
1456
|
+
DATEFORM = "SET SESSION DATEFORM = {}"
|
|
1457
|
+
DEBUG_FUNCTION = "SET SESSION DEBUG FUNCTION {} {}"
|
|
1458
|
+
DOT_NOTATION = "SET SESSION DOT NOTATION {} ON ERROR"
|
|
1459
|
+
ISOLATED_LOADING = "SET SESSION FOR {} ISOLATED LOADING"
|
|
1460
|
+
FUNCTION_TRACE = "SET SESSION FUNCTION TRACE USING {} FOR TABLE {}"
|
|
1461
|
+
JSON_IGNORE_ERRORS = "SET SESSION JSON IGNORE ERRORS {}"
|
|
1462
|
+
SEARCHUIFDBPATH = "SET SESSION SEARCHUIFDBPATH = {}"
|
|
1463
|
+
TRANSACTION_ISOLATION_LEVEL = "SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL {}"
|
|
1464
|
+
QUERY_BAND = "SET QUERY_BAND = {} FOR {}"
|
|
1465
|
+
UDFSEARCHPATH = "SET SESSION UDFSEARCHPATH = {} FOR FUNCTION = {}"
|
|
1466
|
+
|
|
1467
|
+
class SessionParamsPythonNames:
|
|
1468
|
+
# Holds the SQL Statements for Session params.
|
|
1469
|
+
TIMEZONE = "Session Time Zone"
|
|
1470
|
+
ACCOUNT = "Account Name"
|
|
1471
|
+
CALENDAR = "Calendar"
|
|
1472
|
+
COLLATION = "Collation"
|
|
1473
|
+
DATABASE = "Current DataBase"
|
|
1474
|
+
DATEFORM = 'Current DateForm'
|
|
1475
|
+
|