teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +315 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +95 -8
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +5 -1
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +59 -35
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +27 -12
- teradataml/automl/model_training.py +73 -46
- teradataml/common/constants.py +88 -29
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +19 -3
- teradataml/common/messages.py +6 -1
- teradataml/common/sqlbundle.py +64 -12
- teradataml/common/utils.py +246 -47
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +161 -27
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +1049 -285
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +578 -35
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +185 -16
- teradataml/dbutils/dbutils.py +1049 -115
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/_base.py +1466 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
- teradataml/options/__init__.py +54 -38
- teradataml/options/configure.py +131 -27
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +5 -5
- teradataml/scriptmgmt/lls_utils.py +130 -40
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2318 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +51 -2
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +99 -8
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_class.py +0 -255
- teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
teradataml/dbutils/dbutils.py
CHANGED
|
@@ -9,35 +9,45 @@ teradataml db utilities
|
|
|
9
9
|
----------
|
|
10
10
|
A teradataml database utility functions provide interface to Teradata Vantage common tasks such as drop_table, drop_view, create_table etc.
|
|
11
11
|
"""
|
|
12
|
-
import concurrent.futures
|
|
12
|
+
import concurrent.futures
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import shutil
|
|
17
|
+
import tempfile
|
|
13
18
|
from datetime import datetime
|
|
19
|
+
|
|
14
20
|
import pandas as pd
|
|
21
|
+
from sqlalchemy import (CheckConstraint, Column, ForeignKeyConstraint,
|
|
22
|
+
MetaData, PrimaryKeyConstraint, Table,
|
|
23
|
+
UniqueConstraint)
|
|
15
24
|
from sqlalchemy.sql.functions import Function
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
25
|
+
from teradatasql import OperationalError
|
|
26
|
+
from teradatasqlalchemy.dialect import TDCreateTablePost as post
|
|
27
|
+
from teradatasqlalchemy.dialect import dialect as td_dialect
|
|
28
|
+
from teradatasqlalchemy.dialect import preparer
|
|
29
|
+
|
|
30
|
+
import teradataml.dataframe as tdmldf
|
|
31
|
+
from teradataml.common.constants import (SessionParamsPythonNames,
|
|
32
|
+
SessionParamsSQL, SQLConstants,
|
|
33
|
+
TableOperatorConstants,
|
|
34
|
+
TeradataTableKindConstants)
|
|
20
35
|
from teradataml.common.exceptions import TeradataMlException
|
|
21
|
-
from teradataml.common.
|
|
36
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
37
|
+
from teradataml.common.messages import Messages
|
|
22
38
|
from teradataml.common.sqlbundle import SQLBundle
|
|
23
|
-
from teradataml.common.
|
|
24
|
-
from teradataml.
|
|
25
|
-
import teradataml.dataframe as tdmldf
|
|
39
|
+
from teradataml.common.utils import UtilFuncs
|
|
40
|
+
from teradataml.context import context as tdmlctx
|
|
26
41
|
from teradataml.options.configure import configure
|
|
27
|
-
from teradataml.utils.utils import execute_sql
|
|
28
|
-
from teradataml.utils.validators import _Validators
|
|
29
|
-
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
30
|
-
from teradatasql import OperationalError
|
|
31
|
-
from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
|
|
32
|
-
from teradatasqlalchemy.dialect import TDCreateTablePost as post
|
|
33
42
|
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
34
|
-
from sqlalchemy import Table, Column, MetaData, CheckConstraint, \
|
|
35
|
-
PrimaryKeyConstraint, ForeignKeyConstraint, UniqueConstraint
|
|
36
43
|
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
44
|
+
from teradataml.utils.utils import execute_sql
|
|
45
|
+
from teradataml.utils.validators import _Validators
|
|
37
46
|
|
|
38
47
|
|
|
39
48
|
@collect_queryband(queryband='DrpTbl')
|
|
40
|
-
def db_drop_table(table_name, schema_name=None
|
|
49
|
+
def db_drop_table(table_name, schema_name=None, suppress_error=False,
|
|
50
|
+
datalake_name=None, purge=None):
|
|
41
51
|
"""
|
|
42
52
|
DESCRIPTION:
|
|
43
53
|
Drops the table from the given schema.
|
|
@@ -55,6 +65,28 @@ def db_drop_table(table_name, schema_name=None):
|
|
|
55
65
|
Default Value: None
|
|
56
66
|
Types: str
|
|
57
67
|
|
|
68
|
+
suppress_error:
|
|
69
|
+
Optional Argument
|
|
70
|
+
Specifies whether to raise error or not.
|
|
71
|
+
Default Value: False
|
|
72
|
+
Types: str
|
|
73
|
+
|
|
74
|
+
datalake_name:
|
|
75
|
+
Optional Argument
|
|
76
|
+
Specifies name of the datalake to drop table from.
|
|
77
|
+
Note:
|
|
78
|
+
"schema_name" must be provided while using this argument.
|
|
79
|
+
Default Value: None
|
|
80
|
+
Types: str
|
|
81
|
+
|
|
82
|
+
purge:
|
|
83
|
+
Optional Argument
|
|
84
|
+
Specifies whether to use purge clause or not while dropping datalake table.
|
|
85
|
+
It is only applicable when "datalake_name" argument is used. When "datalake_name" is specified,
|
|
86
|
+
but "purge" is not specified, data is purged by default.
|
|
87
|
+
Default Value: None
|
|
88
|
+
Types: bool
|
|
89
|
+
|
|
58
90
|
RETURNS:
|
|
59
91
|
True - if the operation is successful.
|
|
60
92
|
|
|
@@ -64,33 +96,56 @@ def db_drop_table(table_name, schema_name=None):
|
|
|
64
96
|
EXAMPLES:
|
|
65
97
|
>>> load_example_data("dataframe", "admissions_train")
|
|
66
98
|
|
|
67
|
-
# Drop table in current database
|
|
99
|
+
# Example 1: Drop table in current database.
|
|
68
100
|
>>> db_drop_table(table_name = 'admissions_train')
|
|
69
101
|
|
|
70
|
-
# Drop table from the given schema
|
|
102
|
+
# Example 2: Drop table from the given schema.
|
|
71
103
|
>>> db_drop_table(table_name = 'admissions_train', schema_name = 'alice')
|
|
104
|
+
|
|
105
|
+
#Example 3: Drop a table from datalake and purge the data.
|
|
106
|
+
>>> db_drop_table(table_name = 'datalake_table', schema_name = 'datalake_db',
|
|
107
|
+
... datalake_name='datalake', purge=True)
|
|
108
|
+
|
|
72
109
|
"""
|
|
73
110
|
# Argument validations
|
|
74
111
|
awu_matrix = []
|
|
75
112
|
awu_matrix.append(["schema_name", schema_name, True, (str), True])
|
|
76
113
|
awu_matrix.append(["table_name", table_name, False, (str), True])
|
|
77
|
-
|
|
114
|
+
awu_matrix.append(["datalake_name", datalake_name, True, (str), True])
|
|
115
|
+
awu_matrix.append(["purge", purge, True, (bool, type(None)), True])
|
|
78
116
|
# Validate argument types
|
|
79
117
|
_Validators._validate_function_arguments(awu_matrix)
|
|
80
118
|
|
|
119
|
+
# Process datalake related arguments.
|
|
120
|
+
purge_clause = None
|
|
121
|
+
if datalake_name is not None:
|
|
122
|
+
if schema_name is None:
|
|
123
|
+
err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "schema_name",
|
|
124
|
+
"datalake_name")
|
|
125
|
+
raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
|
|
126
|
+
|
|
127
|
+
if purge is False:
|
|
128
|
+
purge_clause = "NO PURGE"
|
|
129
|
+
else:
|
|
130
|
+
purge_clause = "PURGE ALL"
|
|
131
|
+
|
|
81
132
|
# Joining view and schema names in the format "schema_name"."view_name"
|
|
82
|
-
table_name = _get_quoted_object_name(schema_name, table_name)
|
|
133
|
+
table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
|
|
83
134
|
|
|
84
135
|
try:
|
|
85
|
-
return UtilFuncs._drop_table(table_name)
|
|
86
|
-
except TeradataMlException:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
136
|
+
return UtilFuncs._drop_table(table_name, purge_clause=purge_clause)
|
|
137
|
+
except (TeradataMlException, OperationalError):
|
|
138
|
+
if suppress_error:
|
|
139
|
+
pass
|
|
140
|
+
else:
|
|
141
|
+
raise
|
|
90
142
|
except Exception as err:
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
143
|
+
if suppress_error:
|
|
144
|
+
pass
|
|
145
|
+
else:
|
|
146
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.DROP_FAILED, "table",
|
|
147
|
+
table_name),
|
|
148
|
+
MessageCodes.DROP_FAILED) from err
|
|
94
149
|
|
|
95
150
|
|
|
96
151
|
@collect_queryband(queryband='DrpVw')
|
|
@@ -152,7 +207,7 @@ def db_drop_view(view_name, schema_name=None):
|
|
|
152
207
|
|
|
153
208
|
|
|
154
209
|
@collect_queryband(queryband='LstTbls')
|
|
155
|
-
def db_list_tables(schema_name=None, object_name=None, object_type='all'):
|
|
210
|
+
def db_list_tables(schema_name=None, object_name=None, object_type='all', datalake_name=None):
|
|
156
211
|
"""
|
|
157
212
|
DESCRIPTION:
|
|
158
213
|
Lists the Vantage objects(table/view) names for the specified schema name.
|
|
@@ -169,10 +224,12 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
|
|
|
169
224
|
Optional Argument.
|
|
170
225
|
Specifies a table/view name or pattern to be used for filtering them from the database.
|
|
171
226
|
Pattern may contain '%' or '_' as pattern matching characters.
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
227
|
+
- '%' represents any string of zero or more arbitrary characters. Any string of characters is acceptable as
|
|
228
|
+
a replacement for the percent.
|
|
229
|
+
- '_' represents exactly one arbitrary character. Any single character is acceptable in the position in
|
|
230
|
+
which the underscore character appears.
|
|
231
|
+
Note:
|
|
232
|
+
* If '%' is specified in 'object_name', then the '_' character is not evaluated for an arbitrary character.
|
|
176
233
|
Default Value: None
|
|
177
234
|
Types: str
|
|
178
235
|
Example:
|
|
@@ -191,6 +248,14 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
|
|
|
191
248
|
Default Value: 'all'
|
|
192
249
|
Types: str
|
|
193
250
|
|
|
251
|
+
datalake_name:
|
|
252
|
+
Optional Argument.
|
|
253
|
+
Specifies the name of datalake to list tables from.
|
|
254
|
+
Note:
|
|
255
|
+
"schema_name" must be provided while using this argument.
|
|
256
|
+
Default Value: None
|
|
257
|
+
Types: str
|
|
258
|
+
|
|
194
259
|
RETURNS:
|
|
195
260
|
Pandas DataFrame
|
|
196
261
|
|
|
@@ -199,38 +264,40 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
|
|
|
199
264
|
OperationalError - If any errors are raised from Vantage.
|
|
200
265
|
|
|
201
266
|
EXAMPLES:
|
|
202
|
-
# Example 1
|
|
267
|
+
# Example 1: List all object types in the default schema
|
|
203
268
|
>>> load_example_data("dataframe", "admissions_train")
|
|
204
269
|
>>> db_list_tables()
|
|
205
270
|
|
|
206
|
-
# Example 2
|
|
271
|
+
# Example 2: List all the views in the default schema
|
|
207
272
|
>>> execute_sql("create view temporary_view as (select 1 as dummy_col1, 2 as dummy_col2);")
|
|
208
273
|
>>> db_list_tables(None , None, 'view')
|
|
209
274
|
|
|
210
|
-
# Example 3
|
|
211
|
-
#
|
|
275
|
+
# Example 3: List all the object types in the default schema whose names begin with 'abc' followed by any number
|
|
276
|
+
# of characters in the end.
|
|
212
277
|
>>> execute_sql("create view abcd123 as (select 1 as dummy_col1, 2 as dummy_col2);")
|
|
213
|
-
>>> db_list_tables(None, '
|
|
278
|
+
>>> db_list_tables(None, 'abc%', None)
|
|
214
279
|
|
|
215
|
-
# Example 4
|
|
216
|
-
#
|
|
280
|
+
# Example 4: List all the tables in the default schema whose names begin with 'adm' followed by any number of
|
|
281
|
+
# characters and ends with 'train'.
|
|
217
282
|
>>> load_example_data("dataframe", "admissions_train")
|
|
218
|
-
>>> db_list_tables(None, '
|
|
283
|
+
>>> db_list_tables(None, 'adm%train', 'table')
|
|
219
284
|
|
|
220
|
-
# Example 5
|
|
285
|
+
# Example 5: List all the views in the default schema whose names begin with any character but ends with 'abc'
|
|
221
286
|
>>> execute_sql("create view view_abc as (select 1 as dummy_col1, 2 as dummy_col2);")
|
|
222
287
|
>>> db_list_tables(None, '%abc', 'view')
|
|
223
288
|
|
|
224
|
-
# Example 6
|
|
289
|
+
# Example 6: List all the volatile tables in the default schema whose names begin with 'abc' and ends with any
|
|
225
290
|
# arbitrary character and has a length of 4
|
|
226
291
|
>>> execute_sql("CREATE volatile TABLE abcd(col0 int, col1 float) NO PRIMARY INDEX;")
|
|
227
292
|
>>> db_list_tables(None, 'abc_', 'volatile')
|
|
228
293
|
|
|
229
|
-
# Example 7
|
|
294
|
+
# Example 7: List all the temporary objects created by teradataml in the default schema whose names begins and
|
|
230
295
|
# ends with any number of arbitrary characters but contains 'filter' in between.
|
|
231
296
|
>>> db_list_tables(None, '%filter%', 'temp')
|
|
232
|
-
"""
|
|
233
297
|
|
|
298
|
+
# Example 8: List all the tables in datalake's database.
|
|
299
|
+
>>> db_list_tables(schema_name='datalake_db_name', datalake_name='datalake_name')
|
|
300
|
+
"""
|
|
234
301
|
if tdmlctx.get_connection() is None:
|
|
235
302
|
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_CONTEXT_CONNECTION),
|
|
236
303
|
MessageCodes.INVALID_CONTEXT_CONNECTION)
|
|
@@ -245,12 +312,18 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
|
|
|
245
312
|
TeradataTableKindConstants.VOLATILE.value,
|
|
246
313
|
TeradataTableKindConstants.TEMP.value]
|
|
247
314
|
awu_matrix.append(["object_type", object_type, True, (str), True, permitted_object_types])
|
|
248
|
-
|
|
315
|
+
awu_matrix.append(["datalake_name", datalake_name, True, (str), True])
|
|
249
316
|
# Validate argument types
|
|
250
317
|
_Validators._validate_function_arguments(awu_matrix)
|
|
251
318
|
|
|
319
|
+
# 'schema_name' must be provided while using 'datalake_name'.
|
|
320
|
+
_Validators._validate_dependent_argument(dependent_arg='datalake_name',
|
|
321
|
+
dependent_arg_value=datalake_name,
|
|
322
|
+
independent_arg='schema_name',
|
|
323
|
+
independent_arg_value=schema_name)
|
|
324
|
+
|
|
252
325
|
try:
|
|
253
|
-
return _get_select_table_kind(schema_name, object_name, object_type)
|
|
326
|
+
return _get_select_table_kind(schema_name, object_name, object_type, datalake_name)
|
|
254
327
|
except TeradataMlException:
|
|
255
328
|
raise
|
|
256
329
|
except OperationalError:
|
|
@@ -260,21 +333,49 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
|
|
|
260
333
|
MessageCodes.LIST_DB_TABLES_FAILED) from err
|
|
261
334
|
|
|
262
335
|
|
|
263
|
-
def
|
|
336
|
+
def _convert_sql_search_string_to_regex(sql_str):
|
|
337
|
+
"""Internal function to convert SQL string matching patterns to python regex."""
|
|
338
|
+
if sql_str:
|
|
339
|
+
# sql_str[1:-1] Removes single quotes from sql_str.
|
|
340
|
+
sql_str = sql_str[1:-1]
|
|
341
|
+
|
|
342
|
+
# If '%' is specified in 'sql_str',
|
|
343
|
+
# then the '_' character is not evaluated for an arbitrary character.
|
|
344
|
+
if '%' in sql_str:
|
|
345
|
+
# Replace % with .* if not preceded by a backslash.
|
|
346
|
+
sql_str = re.sub(r'(?<!\\)%', r'.*', sql_str, flags=re.IGNORECASE)
|
|
347
|
+
# Remove the escape character for the replacements.
|
|
348
|
+
sql_str = sql_str.replace(r'\%', '%')
|
|
349
|
+
else:
|
|
350
|
+
# Replace _ with . if not preceded by a backslash.
|
|
351
|
+
sql_str = re.sub(r'(?<!\\)_', r'.', sql_str, flags=re.IGNORECASE)
|
|
352
|
+
# Remove the escape character for the replacements.
|
|
353
|
+
sql_str = sql_str.replace(r'\_', '_')
|
|
354
|
+
|
|
355
|
+
# Add boundaries if the string doesn't start or end with '.*' i.e. SQL '%'.
|
|
356
|
+
if not sql_str.startswith('.*'):
|
|
357
|
+
sql_str = '^' + sql_str # Anchor to the start of the string.
|
|
358
|
+
if not sql_str.endswith('.*'):
|
|
359
|
+
sql_str = sql_str + '$' # Anchor to the end of the string.
|
|
360
|
+
return sql_str
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _get_select_table_kind(schema_name, table_name, table_kind, datalake_name):
|
|
264
364
|
"""
|
|
265
|
-
Get the list of the table names from the specified schema name.
|
|
365
|
+
Get the list of the table names from the specified schema name and datalake.
|
|
266
366
|
|
|
267
367
|
PARAMETERS:
|
|
268
368
|
schema_name - The Name of schema in the database. The default value is the current database name.
|
|
269
369
|
table_name - The pattern to be used to filtering the table names from the database.
|
|
270
|
-
The table name argument can contain '%' as pattern matching
|
|
271
|
-
will return all table names starting with any
|
|
370
|
+
The table name argument can contain '%' as pattern matching character.For example '%abc'
|
|
371
|
+
will return all table names starting with any characters and ending with abc.
|
|
272
372
|
table_kind - The table kind to apply the filter. The valid values are 'all','table','view','volatile','temp'.
|
|
273
373
|
all - list the all the table kinds.
|
|
274
374
|
table - list only tables.
|
|
275
375
|
view - list only views.
|
|
276
376
|
volatile - list only volatile temp.
|
|
277
377
|
temp - list all teradata ml temporary objects created in the specified database.
|
|
378
|
+
datalake_name - The name of datalake to search schema in.
|
|
278
379
|
RETURNS:
|
|
279
380
|
Panda's DataFrame - if the operation is successful.
|
|
280
381
|
|
|
@@ -289,60 +390,106 @@ def _get_select_table_kind(schema_name, table_name, table_kind):
|
|
|
289
390
|
object_name_str = "'{0}'".format(table_name)
|
|
290
391
|
object_table_kind = None
|
|
291
392
|
|
|
292
|
-
#
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
#
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
# 'V' - stands for View
|
|
306
|
-
if (table_kind == TeradataTableKindConstants.TABLE.value):
|
|
307
|
-
object_table_kind = "'{0}','{1}','{2}'".format('O', 'Q', 'T')
|
|
308
|
-
elif (table_kind == TeradataTableKindConstants.VIEW.value):
|
|
309
|
-
object_table_kind = "'{0}'".format('V')
|
|
310
|
-
elif (table_kind == TeradataTableKindConstants.TEMP.value):
|
|
311
|
-
if table_name is None:
|
|
312
|
-
object_name_str = "'{0}'".format(TeradataTableKindConstants.ML_PATTERN.value)
|
|
313
|
-
else:
|
|
314
|
-
object_name_str = "'{0}','{1}'".format(table_name,
|
|
315
|
-
TeradataTableKindConstants.ML_PATTERN.value)
|
|
393
|
+
# Tablekind:
|
|
394
|
+
# 'O' - stands for Table with no primary index and no partitioning
|
|
395
|
+
# 'Q' - stands for Queue table
|
|
396
|
+
# 'T' - stands for a Table with a primary index or primary AMP index, partitioning, or both.
|
|
397
|
+
# Or a partitioned table with NoPI
|
|
398
|
+
# 'V' - stands for View
|
|
399
|
+
if (table_kind == TeradataTableKindConstants.TABLE.value):
|
|
400
|
+
object_table_kind = ['O', 'Q', 'T']
|
|
401
|
+
elif (table_kind == TeradataTableKindConstants.VIEW.value):
|
|
402
|
+
object_table_kind = ['V']
|
|
403
|
+
elif (table_kind == TeradataTableKindConstants.TEMP.value):
|
|
404
|
+
if table_name is None:
|
|
405
|
+
object_name_str = "'{0}'".format(TeradataTableKindConstants.ML_PATTERN.value)
|
|
316
406
|
else:
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
407
|
+
object_name_str = "'{0}','{1}'".format(table_name,
|
|
408
|
+
TeradataTableKindConstants.ML_PATTERN.value)
|
|
409
|
+
else:
|
|
410
|
+
object_table_kind = ['O', 'Q', 'T', 'V']
|
|
411
|
+
|
|
412
|
+
if datalake_name is None:
|
|
413
|
+
# Check the schema name.
|
|
414
|
+
if schema_name is None:
|
|
415
|
+
schema_name = tdmlctx._get_current_databasename()
|
|
416
|
+
|
|
417
|
+
# Create an empty dataframe with desired column name.
|
|
418
|
+
pddf = pd.DataFrame(columns=[TeradataTableKindConstants.REGULAR_TABLE_NAME.value])
|
|
419
|
+
|
|
420
|
+
# Check the table kind.
|
|
421
|
+
if table_kind != TeradataTableKindConstants.VOLATILE.value:
|
|
422
|
+
if object_table_kind is not None:
|
|
423
|
+
object_table_kind = ', '.join([f"'{value}'" for value in object_table_kind])
|
|
424
|
+
query = SQLBundle._build_select_table_kind(schema_name, object_name_str, object_table_kind)
|
|
425
|
+
pddf = pd.read_sql(query, tdmlctx.td_connection.connection)
|
|
426
|
+
|
|
427
|
+
# Check if all table kind or volatile table kind is requested.
|
|
428
|
+
# If so,add volatile tables to the pddf.
|
|
429
|
+
if table_kind == TeradataTableKindConstants.ALL.value or \
|
|
430
|
+
table_kind == TeradataTableKindConstants.VOLATILE.value:
|
|
431
|
+
# Create list of volatile tables.
|
|
324
432
|
try:
|
|
325
|
-
# Add volatile tables to all dataframe.
|
|
326
433
|
vtquery = SQLBundle._build_help_volatile_table()
|
|
327
434
|
vtdf = pd.read_sql(vtquery, tdmlctx.td_connection.connection)
|
|
328
435
|
if not vtdf.empty:
|
|
436
|
+
# Volatile table query returns different column names.
|
|
437
|
+
# So, rename its column names to match with normal
|
|
438
|
+
# 'SELECT TABLENAME FROM DBC.TABLESV' query results.
|
|
329
439
|
columns_dict = {TeradataTableKindConstants.VOLATILE_TABLE_NAME.value:
|
|
330
440
|
TeradataTableKindConstants.REGULAR_TABLE_NAME.value}
|
|
331
441
|
vtdf.rename(columns=columns_dict, inplace=True)
|
|
442
|
+
# Volatile table names might contain leading whitespaces. Remove those.
|
|
443
|
+
vtdf[TeradataTableKindConstants.REGULAR_TABLE_NAME.value] = vtdf[TeradataTableKindConstants.REGULAR_TABLE_NAME.value].str.strip()
|
|
444
|
+
# Filter volatile tables using table name pattern.
|
|
445
|
+
if object_name_str and (object_name_str := _convert_sql_search_string_to_regex(object_name_str)):
|
|
446
|
+
name_filter = vtdf[TeradataTableKindConstants.REGULAR_TABLE_NAME.value].str.strip().str.match(
|
|
447
|
+
object_name_str,
|
|
448
|
+
na=False,
|
|
449
|
+
flags=re.IGNORECASE)
|
|
450
|
+
vtdf = vtdf[name_filter]
|
|
451
|
+
# Concat existing list with volatile tables list.
|
|
332
452
|
frames = [pddf, vtdf[[TeradataTableKindConstants.REGULAR_TABLE_NAME.value]]]
|
|
333
453
|
pddf = pd.concat(frames)
|
|
334
454
|
pddf.reset_index(drop=True, inplace=True)
|
|
335
455
|
except Exception as err:
|
|
336
|
-
# No
|
|
456
|
+
# No volatile tables exist.
|
|
337
457
|
pass
|
|
338
|
-
if (table_kind == TeradataTableKindConstants.VOLATILE.value):
|
|
339
|
-
columns_dict = {TeradataTableKindConstants.VOLATILE_TABLE_NAME.value:
|
|
340
|
-
TeradataTableKindConstants.REGULAR_TABLE_NAME.value}
|
|
341
|
-
pddf.rename(columns=columns_dict, inplace=True)
|
|
342
|
-
return pddf[[TeradataTableKindConstants.REGULAR_TABLE_NAME.value]]
|
|
343
458
|
else:
|
|
344
459
|
return pddf
|
|
345
|
-
|
|
460
|
+
else:
|
|
461
|
+
# TODO: when OTF team enables VSD support for datalake tables
|
|
462
|
+
# with epic: https://teradata-pe.atlassian.net/browse/OTF-454,
|
|
463
|
+
# this can be changed to use VSD_tablesV table which is
|
|
464
|
+
# similar to DBC.TABLESV.
|
|
465
|
+
# For datalake tables' information we need to use help database and
|
|
466
|
+
# then apply filter for table kind and table substring.
|
|
467
|
+
# We can't use select from DBC.TABLESV.
|
|
468
|
+
sqlbundle = SQLBundle()
|
|
469
|
+
help_db_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_DATABASE)
|
|
470
|
+
pddf = pd.read_sql(help_db_sql.format(_get_quoted_object_name(schema_name=datalake_name,
|
|
471
|
+
object_name=schema_name)),
|
|
472
|
+
tdmlctx.td_connection.connection)
|
|
473
|
+
|
|
474
|
+
if object_name_str:
|
|
475
|
+
object_name_str = _convert_sql_search_string_to_regex(object_name_str)
|
|
476
|
+
if object_name_str:
|
|
477
|
+
name_filter = pddf['Table/View/Macro Name'].str.strip().str.match(object_name_str, na=False,
|
|
478
|
+
flags=re.IGNORECASE)
|
|
479
|
+
pddf = pddf[name_filter]
|
|
480
|
+
|
|
481
|
+
if object_table_kind is not None:
|
|
482
|
+
object_filter = pddf['Kind'].isin(object_table_kind)
|
|
483
|
+
pddf = pddf[object_filter]
|
|
484
|
+
|
|
485
|
+
columns_dict = {'Table/View/Macro Name':
|
|
486
|
+
TeradataTableKindConstants.REGULAR_TABLE_NAME.value}
|
|
487
|
+
pddf.rename(columns=columns_dict, inplace=True)
|
|
488
|
+
|
|
489
|
+
# Return only filtered columns.
|
|
490
|
+
if not pddf.empty:
|
|
491
|
+
return pddf[[TeradataTableKindConstants.REGULAR_TABLE_NAME.value]]
|
|
492
|
+
else:
|
|
346
493
|
return pd.DataFrame()
|
|
347
494
|
|
|
348
495
|
|
|
@@ -390,7 +537,7 @@ def _execute_transaction(queries):
|
|
|
390
537
|
for query in queries:
|
|
391
538
|
cur.execute(query)
|
|
392
539
|
|
|
393
|
-
# Try committing the
|
|
540
|
+
# Try committing the transaction
|
|
394
541
|
con.commit()
|
|
395
542
|
except Exception:
|
|
396
543
|
# Let's first rollback
|
|
@@ -402,6 +549,73 @@ def _execute_transaction(queries):
|
|
|
402
549
|
cur.execute(auto_commit_on)
|
|
403
550
|
|
|
404
551
|
|
|
552
|
+
def db_transaction(func):
|
|
553
|
+
"""
|
|
554
|
+
DESCRIPTION:
|
|
555
|
+
Function to execute another function in a transaction.
|
|
556
|
+
|
|
557
|
+
PARAMETERS:
|
|
558
|
+
func:
|
|
559
|
+
Required Argument.
|
|
560
|
+
Specifies the function to be executed in a single transaction.
|
|
561
|
+
Types: function
|
|
562
|
+
|
|
563
|
+
RETURNS:
|
|
564
|
+
The object returned by "func".
|
|
565
|
+
|
|
566
|
+
RAISES:
|
|
567
|
+
TeradataMlException, OperationalError
|
|
568
|
+
|
|
569
|
+
EXAMPLES:
|
|
570
|
+
# Example: Declare a function to delete all the records from two tables
|
|
571
|
+
# and execute the function in a transaction.
|
|
572
|
+
>>> @db_transaction
|
|
573
|
+
... def insert_data(table1, table2):
|
|
574
|
+
... execute_sql("delete from {}".format(table1))
|
|
575
|
+
... execute_sql("delete from {}".format(table2))
|
|
576
|
+
... return True
|
|
577
|
+
>>> # Executing the above function in a transaction.
|
|
578
|
+
>>> insert_data("sales", "admissions_train")
|
|
579
|
+
True
|
|
580
|
+
>>>
|
|
581
|
+
"""
|
|
582
|
+
|
|
583
|
+
def execute_transaction(*args, **kwargs):
|
|
584
|
+
auto_commit_off = "{fn teradata_nativesql}{fn teradata_autocommit_off}"
|
|
585
|
+
auto_commit_on = "{fn teradata_nativesql}{fn teradata_autocommit_on}"
|
|
586
|
+
con = None
|
|
587
|
+
cur = None
|
|
588
|
+
|
|
589
|
+
result = None
|
|
590
|
+
try:
|
|
591
|
+
con = tdmlctx.td_connection
|
|
592
|
+
if con is None:
|
|
593
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
|
|
594
|
+
MessageCodes.CONNECTION_FAILURE)
|
|
595
|
+
con = con.connection
|
|
596
|
+
cur = con.cursor()
|
|
597
|
+
# Set auto_commit to OFF.
|
|
598
|
+
cur.execute(auto_commit_off)
|
|
599
|
+
|
|
600
|
+
# Execute function.
|
|
601
|
+
result = func(*args, **kwargs)
|
|
602
|
+
|
|
603
|
+
# Try committing the transaction.
|
|
604
|
+
con.commit()
|
|
605
|
+
except Exception:
|
|
606
|
+
# Let's first rollback.
|
|
607
|
+
con.rollback()
|
|
608
|
+
# Now, let's raise the error as is.
|
|
609
|
+
raise
|
|
610
|
+
finally:
|
|
611
|
+
# Finally, we must set auto_commit to ON.
|
|
612
|
+
cur.execute(auto_commit_on)
|
|
613
|
+
|
|
614
|
+
return result
|
|
615
|
+
|
|
616
|
+
return execute_transaction
|
|
617
|
+
|
|
618
|
+
|
|
405
619
|
def _execute_stored_procedure(function_call, fetchWarnings=True, expect_none_result=False):
|
|
406
620
|
"""
|
|
407
621
|
DESCRIPTION:
|
|
@@ -499,7 +713,7 @@ def _get_function_call_as_string(sqlcFuncObj):
|
|
|
499
713
|
return str(sqlcFuncObj.compile(**kw))
|
|
500
714
|
|
|
501
715
|
|
|
502
|
-
def _get_quoted_object_name(schema_name, object_name):
|
|
716
|
+
def _get_quoted_object_name(schema_name, object_name, datalake=None):
|
|
503
717
|
"""
|
|
504
718
|
DESCRIPTION:
|
|
505
719
|
This function quotes and joins schema name to the object name which can either be table or a view.
|
|
@@ -508,12 +722,18 @@ def _get_quoted_object_name(schema_name, object_name):
|
|
|
508
722
|
schema_name
|
|
509
723
|
Required Argument.
|
|
510
724
|
Specifies the schema name.
|
|
511
|
-
|
|
725
|
+
Type: str
|
|
512
726
|
|
|
513
727
|
object_name
|
|
514
728
|
Required Argument.
|
|
515
729
|
Specifies the object name either table or view.
|
|
516
|
-
|
|
730
|
+
Type: str
|
|
731
|
+
|
|
732
|
+
datalake
|
|
733
|
+
Optional Argument.
|
|
734
|
+
Specifies the datalake name.
|
|
735
|
+
Default value: None
|
|
736
|
+
Type: str
|
|
517
737
|
|
|
518
738
|
RAISES:
|
|
519
739
|
None
|
|
@@ -535,6 +755,8 @@ def _get_quoted_object_name(schema_name, object_name):
|
|
|
535
755
|
schema_name = tdp.quote(tdmlctx._get_current_databasename())
|
|
536
756
|
|
|
537
757
|
quoted_object_name = "{0}.{1}".format(schema_name, tdp.quote(object_name))
|
|
758
|
+
if datalake is not None:
|
|
759
|
+
quoted_object_name = "{}.{}".format(tdp.quote(datalake), quoted_object_name)
|
|
538
760
|
return quoted_object_name
|
|
539
761
|
|
|
540
762
|
|
|
@@ -635,7 +857,6 @@ def view_log(log_type="script", num_lines=1000, query_id=None, log_dir=None):
|
|
|
635
857
|
# Validate num_lines is a positive integer.
|
|
636
858
|
_Validators._validate_positive_int(num_lines, "num_lines")
|
|
637
859
|
|
|
638
|
-
|
|
639
860
|
# Query for viewing last n lines of script log.
|
|
640
861
|
view_log_query = TableOperatorConstants.SCRIPT_LOG_QUERY.value \
|
|
641
862
|
.format(num_lines, configure.default_varchar_size)
|
|
@@ -656,8 +877,9 @@ def view_log(log_type="script", num_lines=1000, query_id=None, log_dir=None):
|
|
|
656
877
|
err_msg = 'Please provide directory path instead of file path.'.format(
|
|
657
878
|
log_dir)
|
|
658
879
|
raise TeradataMlException(err_msg, MessageCodes.INPUT_FILE_NOT_FOUND)
|
|
659
|
-
from teradataml.scriptmgmt.UserEnv import _get_auth_token,
|
|
660
|
-
|
|
880
|
+
from teradataml.scriptmgmt.UserEnv import (_get_auth_token,
|
|
881
|
+
_get_ues_url,
|
|
882
|
+
_process_ues_response)
|
|
661
883
|
ues_url = _get_ues_url(logs=True, query_id=query_id)
|
|
662
884
|
response = UtilFuncs._http_request(ues_url, headers=_get_auth_token())
|
|
663
885
|
resp = _process_ues_response(api_name="view_log", response=response)
|
|
@@ -721,9 +943,10 @@ def _fetch_url_and_save(url, file_path):
|
|
|
721
943
|
def _check_if_python_packages_installed():
|
|
722
944
|
"""
|
|
723
945
|
DESCRIPTION:
|
|
724
|
-
Function to set global
|
|
725
|
-
|
|
726
|
-
|
|
946
|
+
Function to set the following global variables based on whether the Vantage node
|
|
947
|
+
has Python and add-on packages including pip3 installed.
|
|
948
|
+
- 'python_packages_installed' to True or False
|
|
949
|
+
- 'python_version_vantage' to the version of Python installed on Vantage.
|
|
727
950
|
|
|
728
951
|
PARAMETERS:
|
|
729
952
|
None.
|
|
@@ -737,14 +960,21 @@ def _check_if_python_packages_installed():
|
|
|
737
960
|
EXAMPLES:
|
|
738
961
|
_check_if_python_packages_installed()
|
|
739
962
|
"""
|
|
963
|
+
if tdmlctx.python_packages_installed:
|
|
964
|
+
# Skip check if Python and add-on packages are already installed and checked.
|
|
965
|
+
return
|
|
966
|
+
|
|
740
967
|
# Check if Python interpreter and add-ons packages are installed or not.
|
|
741
968
|
try:
|
|
742
969
|
query = TableOperatorConstants.CHECK_PYTHON_INSTALLED.value.format(configure.indb_install_location)
|
|
743
|
-
UtilFuncs._execute_query(query=query)
|
|
970
|
+
opt = UtilFuncs._execute_query(query=query)
|
|
971
|
+
|
|
972
|
+
python_version = opt[0][0].split(" -- ")[1].split(" ")[1].strip()
|
|
744
973
|
|
|
745
974
|
# If query execution is successful, then Python and add-on packages are
|
|
746
975
|
# present.
|
|
747
976
|
tdmlctx.python_packages_installed = True
|
|
977
|
+
tdmlctx.python_version_vantage = python_version
|
|
748
978
|
except Exception as err:
|
|
749
979
|
# Raise Exception if the error message does not contain
|
|
750
980
|
# "bash: pip3: command not found".
|
|
@@ -855,6 +1085,203 @@ def db_python_package_details(names=None):
|
|
|
855
1085
|
return ret_val
|
|
856
1086
|
|
|
857
1087
|
|
|
1088
|
+
def _db_python_package_version_diff(packages=None, only_diff=True):
|
|
1089
|
+
"""
|
|
1090
|
+
DESCRIPTION:
|
|
1091
|
+
Internal function to get the pandas dataframe containing the difference in the Python
|
|
1092
|
+
packages installed on Vantage and the packages mentioned in the argument "packages".
|
|
1093
|
+
Note:
|
|
1094
|
+
* Using this function is valid only when Python interpreter and add-on packages
|
|
1095
|
+
are installed on the Vantage node.
|
|
1096
|
+
* This function also checks for differences in Python packages versions given
|
|
1097
|
+
part of package name as string.
|
|
1098
|
+
* Returns pandas dataframe of only differences when the argument `only_diff` is set to
|
|
1099
|
+
True. Otherwise, returns all the packages.
|
|
1100
|
+
|
|
1101
|
+
PARAMETERS:
|
|
1102
|
+
packages:
|
|
1103
|
+
Required Argument.
|
|
1104
|
+
Specifies the name(s) of the Python package(s) for which the difference
|
|
1105
|
+
in the versions is to be fetched from Vantage.
|
|
1106
|
+
Note:
|
|
1107
|
+
* If this argument is None, all the packages installed on Vantage are considered.
|
|
1108
|
+
* If any package is present in Vantage but not in the current environment, then None
|
|
1109
|
+
is shown as the version of the package in the current environment.
|
|
1110
|
+
Types: str or list of str
|
|
1111
|
+
|
|
1112
|
+
only_diff:
|
|
1113
|
+
Optional Argument.
|
|
1114
|
+
Specifies whether to return only the differences in the versions of the packages
|
|
1115
|
+
installed on Vantage and the packages mentioned in the argument "packages".
|
|
1116
|
+
Default Value: True
|
|
1117
|
+
|
|
1118
|
+
RETURNS:
|
|
1119
|
+
pandas DataFrame
|
|
1120
|
+
|
|
1121
|
+
RAISES:
|
|
1122
|
+
TeradataMlException.
|
|
1123
|
+
|
|
1124
|
+
EXAMPLES:
|
|
1125
|
+
# Note:
|
|
1126
|
+
# These examples will work only when the Python packages are installed on Vantage.
|
|
1127
|
+
|
|
1128
|
+
# Example 1: Get the difference in the versions of Python packages 'dill' and 'matplotlib'
|
|
1129
|
+
# installed on Vantage.
|
|
1130
|
+
>>> _db_python_package_version_diff(["dill", "matplotlib"])
|
|
1131
|
+
package vantage local
|
|
1132
|
+
0 dill 0.3.6 0.3.7
|
|
1133
|
+
|
|
1134
|
+
# Example 2: Get the difference in the versions of Python packages 'dill' and 'matplotlib'
|
|
1135
|
+
# installed on Vantage and 'only_diff' argument set to False.
|
|
1136
|
+
>>> _db_python_package_version_diff(["dill", "matplotlib"], only_diff=False)
|
|
1137
|
+
package vantage local
|
|
1138
|
+
0 matplotlib-inline 0.1.6 0.1.6
|
|
1139
|
+
1 dill 0.3.6 0.3.7
|
|
1140
|
+
2 matplotlib 3.6.2 3.6.2
|
|
1141
|
+
"""
|
|
1142
|
+
# Check if Python interpreter and add-on packages are installed or not.
|
|
1143
|
+
_check_if_python_packages_installed()
|
|
1144
|
+
|
|
1145
|
+
# Raise error if Python and add-on packages are not installed.
|
|
1146
|
+
if not tdmlctx.python_packages_installed:
|
|
1147
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_NOT_INSTALLED),
|
|
1148
|
+
MessageCodes.PYTHON_NOT_INSTALLED)
|
|
1149
|
+
|
|
1150
|
+
# Installed packages dictionary.
|
|
1151
|
+
db_pkg_df = db_python_package_details(packages)
|
|
1152
|
+
if db_pkg_df is None:
|
|
1153
|
+
return None
|
|
1154
|
+
|
|
1155
|
+
pkgs_dict = {row.package: row.version for row in db_pkg_df.itertuples()}
|
|
1156
|
+
|
|
1157
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
1158
|
+
diff_list = []
|
|
1159
|
+
|
|
1160
|
+
for pkg in pkgs_dict.keys():
|
|
1161
|
+
vantage_version = pkgs_dict.get(pkg)
|
|
1162
|
+
try:
|
|
1163
|
+
local_version = version(pkg)
|
|
1164
|
+
except PackageNotFoundError:
|
|
1165
|
+
# If package is not found in the current environment, then the local version is set to None.
|
|
1166
|
+
local_version = None
|
|
1167
|
+
except Exception as e:
|
|
1168
|
+
# Any other exception is raised.
|
|
1169
|
+
raise
|
|
1170
|
+
|
|
1171
|
+
if only_diff:
|
|
1172
|
+
if vantage_version != local_version:
|
|
1173
|
+
# Add to list only when the versions are different.
|
|
1174
|
+
diff_list.append([pkg, vantage_version, local_version])
|
|
1175
|
+
else:
|
|
1176
|
+
# Add to list all the packages and versions irrespective of the differences.
|
|
1177
|
+
diff_list.append([pkg, vantage_version, local_version])
|
|
1178
|
+
|
|
1179
|
+
return pd.DataFrame(diff_list, columns=["package", "vantage", "local"])
|
|
1180
|
+
|
|
1181
|
+
|
|
1182
|
+
@collect_queryband(queryband='PythonDiff')
|
|
1183
|
+
def db_python_version_diff():
|
|
1184
|
+
"""
|
|
1185
|
+
DESCRIPTION:
|
|
1186
|
+
Function to get the difference of the Python intepreter major version installed on Vantage
|
|
1187
|
+
and the Python version used in the current environment.
|
|
1188
|
+
|
|
1189
|
+
Note:
|
|
1190
|
+
* Using this function is valid only when Python interpreter and add-on packages
|
|
1191
|
+
are installed on the Vantage node.
|
|
1192
|
+
|
|
1193
|
+
RETURNS:
|
|
1194
|
+
Empty dictionary when Python major version is same on Vantage and the current environment.
|
|
1195
|
+
Otherwise, returns a dictionary with the following keys:
|
|
1196
|
+
- 'vantage_version': Python major version installed on Vantage.
|
|
1197
|
+
- 'local_version': Python major version used in the current environment.
|
|
1198
|
+
|
|
1199
|
+
RAISES:
|
|
1200
|
+
TeradataMlException.
|
|
1201
|
+
|
|
1202
|
+
EXAMPLES:
|
|
1203
|
+
# Note:
|
|
1204
|
+
# These examples will work only when the Python packages are installed on Vantage.
|
|
1205
|
+
|
|
1206
|
+
# Example 1: Get the difference in the Python version installed on Vantage and the current environment.
|
|
1207
|
+
>>> db_python_version_diff()
|
|
1208
|
+
{"vantage_version": "3.7", "local_version": "3.8"}
|
|
1209
|
+
"""
|
|
1210
|
+
# Check if Python interpretor and add-on packages are installed or not.
|
|
1211
|
+
_check_if_python_packages_installed()
|
|
1212
|
+
|
|
1213
|
+
# Raise error if Python and add-on packages are not installed.
|
|
1214
|
+
if not tdmlctx.python_packages_installed:
|
|
1215
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_NOT_INSTALLED),
|
|
1216
|
+
MessageCodes.PYTHON_NOT_INSTALLED)
|
|
1217
|
+
|
|
1218
|
+
# Get major version of python installed on Vantage and the current environment.
|
|
1219
|
+
python_local = tdmlctx.python_version_local.rsplit(".", 1)[0]
|
|
1220
|
+
python_vantage = tdmlctx.python_version_vantage.rsplit(".", 1)[0]
|
|
1221
|
+
|
|
1222
|
+
if python_local != python_vantage:
|
|
1223
|
+
return {"vantage_version": python_vantage, "local_version": python_local}
|
|
1224
|
+
|
|
1225
|
+
return {}
|
|
1226
|
+
|
|
1227
|
+
|
|
1228
|
+
@collect_queryband(queryband='PkgDiff')
|
|
1229
|
+
def db_python_package_version_diff(packages=None):
|
|
1230
|
+
"""
|
|
1231
|
+
DESCRIPTION:
|
|
1232
|
+
Function to get the difference of the Python packages installed on Vantage and
|
|
1233
|
+
in the current environment mentioned in the argument "packages".
|
|
1234
|
+
|
|
1235
|
+
Notes:
|
|
1236
|
+
* Using this function is valid only when Python interpreter and add-on packages
|
|
1237
|
+
are installed on the Vantage node.
|
|
1238
|
+
* This function also checks for differences in Python packages versions given
|
|
1239
|
+
part of package name as string.
|
|
1240
|
+
|
|
1241
|
+
PARAMETERS:
|
|
1242
|
+
packages:
|
|
1243
|
+
Optional Argument.
|
|
1244
|
+
Specifies the name(s) of the Python package(s) for which the difference
|
|
1245
|
+
in the versions is to be fetched from Vantage.
|
|
1246
|
+
Notes:
|
|
1247
|
+
* If this argument is None, all the packages installed on Vantage are considered.
|
|
1248
|
+
* If any package is present in Vantage but not in the current environment, then None
|
|
1249
|
+
is shown as the version of the package in the current environment.
|
|
1250
|
+
Types: str or list of str
|
|
1251
|
+
|
|
1252
|
+
RETURNS:
|
|
1253
|
+
pandas DataFrame
|
|
1254
|
+
|
|
1255
|
+
RAISES:
|
|
1256
|
+
TeradataMlException.
|
|
1257
|
+
|
|
1258
|
+
EXAMPLES:
|
|
1259
|
+
# Note:
|
|
1260
|
+
# These examples will work only when the Python packages are installed on Vantage.
|
|
1261
|
+
|
|
1262
|
+
# Example 1: Get the difference in the versions of Python package 'dill' installed on Vantage.
|
|
1263
|
+
>>> db_python_package_version_diff("dill")
|
|
1264
|
+
package vantage local
|
|
1265
|
+
0 dill 0.10.0 0.11.2
|
|
1266
|
+
|
|
1267
|
+
# Example 2: Get the difference in the versions of all Python packages installed on Vantage.
|
|
1268
|
+
>>> db_python_package_version_diff()
|
|
1269
|
+
package vantage local
|
|
1270
|
+
0 scikit-learn 1.3.3 0.24.2
|
|
1271
|
+
1 dill 0.10.0 0.11.2
|
|
1272
|
+
...
|
|
1273
|
+
532 attrs 18.2.0 17.0.0
|
|
1274
|
+
|
|
1275
|
+
"""
|
|
1276
|
+
# Validate arguments.
|
|
1277
|
+
__arg_info_matrix = []
|
|
1278
|
+
__arg_info_matrix.append(["packages", packages, True, (str, list), True])
|
|
1279
|
+
|
|
1280
|
+
_Validators._validate_function_arguments(arg_list=__arg_info_matrix)
|
|
1281
|
+
|
|
1282
|
+
return _db_python_package_version_diff(packages=packages)
|
|
1283
|
+
|
|
1284
|
+
|
|
858
1285
|
def _create_table(table_name,
|
|
859
1286
|
columns,
|
|
860
1287
|
primary_index=None,
|
|
@@ -983,7 +1410,8 @@ def _create_table(table_name,
|
|
|
983
1410
|
else:
|
|
984
1411
|
pti = pti.no_primary_index()
|
|
985
1412
|
|
|
986
|
-
con_form=[]
|
|
1413
|
+
con_form = []
|
|
1414
|
+
foreign_constraints = []
|
|
987
1415
|
for c_name, parameters in kwargs.items():
|
|
988
1416
|
_Validators._validate_function_arguments([["constraint_type", c_name, True, str,
|
|
989
1417
|
True, SQLConstants.CONSTRAINT.value]])
|
|
@@ -992,9 +1420,21 @@ def _create_table(table_name,
|
|
|
992
1420
|
[con_form.append("{}('{}')".format("CheckConstraint", col)) for col in parameters]
|
|
993
1421
|
if c_name in 'foreign_key_constraint':
|
|
994
1422
|
parameters = parameters if isinstance(parameters[0], tuple) else [tuple(parameters)]
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1423
|
+
# Every element in parameter is 3 elements.
|
|
1424
|
+
# 1st element and 2nd element also a list. 3rd element is name of ForeignKey.
|
|
1425
|
+
for fk_columns, fk_ref_columns, fk_name in parameters:
|
|
1426
|
+
fk_ref_column_objs = []
|
|
1427
|
+
|
|
1428
|
+
# fk_ref_columns is in this format - table_name.column_name .
|
|
1429
|
+
# There is no provision for schema name here.
|
|
1430
|
+
# sqlalchemy is not accepting this notation here - schema_name.table_name.column_name
|
|
1431
|
+
# So, create Column Object and bind schema name and table name to it.
|
|
1432
|
+
for fk_ref_column in fk_ref_columns:
|
|
1433
|
+
ref_column_table, ref_column = fk_ref_column.split(".")
|
|
1434
|
+
t = Table(ref_column_table, MetaData(), Column(ref_column), schema=schema_name)
|
|
1435
|
+
fk_ref_column_objs.append(getattr(t, "c")[ref_column])
|
|
1436
|
+
foreign_constraints.append(ForeignKeyConstraint(fk_columns, fk_ref_column_objs, fk_name))
|
|
1437
|
+
|
|
998
1438
|
if c_name in ['primary_key_constraint', 'unique_key_constraint']:
|
|
999
1439
|
c_name = "UniqueConstraint" if c_name in 'unique_key_constraint' else 'PrimaryKeyConstraint'
|
|
1000
1440
|
parameters = UtilFuncs._as_list(parameters)
|
|
@@ -1007,7 +1447,9 @@ def _create_table(table_name,
|
|
|
1007
1447
|
" columns.items()),{} teradatasql_post_create=pti,prefixes=prefix," \
|
|
1008
1448
|
"schema=schema_name)".format("" if con_form is None else ",".join(con_form))
|
|
1009
1449
|
|
|
1010
|
-
table=eval(table_str)
|
|
1450
|
+
table = eval(table_str)
|
|
1451
|
+
for foreign_constraint in foreign_constraints:
|
|
1452
|
+
table.append_constraint(foreign_constraint)
|
|
1011
1453
|
table.create(bind=tdmlctx.get_context())
|
|
1012
1454
|
|
|
1013
1455
|
except Exception as err:
|
|
@@ -1015,6 +1457,372 @@ def _create_table(table_name,
|
|
|
1015
1457
|
raise TeradataMlException(Messages.get_message(msg_code, "create table", str(err)), msg_code)
|
|
1016
1458
|
|
|
1017
1459
|
|
|
1460
|
+
def _create_database(schema_name, size='10e6', spool_size=None,
|
|
1461
|
+
datalake=None, **kwargs):
|
|
1462
|
+
"""
|
|
1463
|
+
DESCRIPTION:
|
|
1464
|
+
Internal function to create a database with the specified name and size.
|
|
1465
|
+
|
|
1466
|
+
PARAMETERS:
|
|
1467
|
+
schema_name:
|
|
1468
|
+
Required Argument.
|
|
1469
|
+
Specifies the name of the database to create.
|
|
1470
|
+
Types: str
|
|
1471
|
+
|
|
1472
|
+
size:
|
|
1473
|
+
Optional Argument.
|
|
1474
|
+
Specifies the number of bytes to allocate to new database.
|
|
1475
|
+
Note:
|
|
1476
|
+
Exponential notation can also be used.
|
|
1477
|
+
Types: str or int
|
|
1478
|
+
|
|
1479
|
+
spool_size:
|
|
1480
|
+
Optional Argument.
|
|
1481
|
+
Specifies the number of bytes to allocate to new database
|
|
1482
|
+
for spool space.
|
|
1483
|
+
Note:
|
|
1484
|
+
Exponential notation can also be used.
|
|
1485
|
+
Types: str or int
|
|
1486
|
+
|
|
1487
|
+
datalake:
|
|
1488
|
+
Optional Argument.
|
|
1489
|
+
Specifies the name of datalake to create database in.
|
|
1490
|
+
Types: str
|
|
1491
|
+
|
|
1492
|
+
kwargs:
|
|
1493
|
+
Optional Argument.
|
|
1494
|
+
Specifies keyword arguments which are used in DBPROPERTIES
|
|
1495
|
+
clause as key-value pair while creating datalake database.
|
|
1496
|
+
|
|
1497
|
+
RETURNS:
|
|
1498
|
+
bool
|
|
1499
|
+
|
|
1500
|
+
RAISES:
|
|
1501
|
+
TeradataMlException.
|
|
1502
|
+
|
|
1503
|
+
EXAMPLES:
|
|
1504
|
+
>>> from teradataml.dbutils.dbutils import _create_database
|
|
1505
|
+
# Example 1: Create database.
|
|
1506
|
+
>>> _create_database("db_name1", "10e5")
|
|
1507
|
+
|
|
1508
|
+
# Example 2: Create database in datalake.
|
|
1509
|
+
>>> _create_database("otf_db_1", datalake="datalake_iceberg_glue")
|
|
1510
|
+
|
|
1511
|
+
# Example 3: Create database in datalake having DBPROPERTIES.
|
|
1512
|
+
>>> _create_database("otf_db", datalake="datalake_iceberg_glue",
|
|
1513
|
+
... owner='tdml_user', other_property='some_value',
|
|
1514
|
+
... other_property2=20, comment='Created by tdml_user')
|
|
1515
|
+
"""
|
|
1516
|
+
if datalake:
|
|
1517
|
+
db_properties = []
|
|
1518
|
+
for key, val in kwargs.items():
|
|
1519
|
+
db_properties.append("'{}'='{}'".format(key, val))
|
|
1520
|
+
|
|
1521
|
+
sql = "CREATE DATABASE {}.{}{};".format(datalake, schema_name,
|
|
1522
|
+
' DBPROPERTIES({})'.format(','.join(db_properties))
|
|
1523
|
+
if db_properties else '')
|
|
1524
|
+
|
|
1525
|
+
else:
|
|
1526
|
+
sql = "CREATE DATABASE {} FROM {} AS PERM = {}".format(schema_name, tdmlctx._get_database_username(), size)
|
|
1527
|
+
|
|
1528
|
+
# If user pass spool size, create it with specified space.
|
|
1529
|
+
if spool_size:
|
|
1530
|
+
sql = "{} , SPOOL = {}".format(sql, spool_size)
|
|
1531
|
+
|
|
1532
|
+
execute_sql(sql)
|
|
1533
|
+
return True
|
|
1534
|
+
|
|
1535
|
+
|
|
1536
|
+
def _update_data(update_columns_values, table_name, schema_name, datalake_name=None, update_conditions=None):
|
|
1537
|
+
"""
|
|
1538
|
+
DESCRIPTION:
|
|
1539
|
+
Internal function to update the data in a table.
|
|
1540
|
+
|
|
1541
|
+
PARAMETERS:
|
|
1542
|
+
update_columns_values:
|
|
1543
|
+
Required Argument.
|
|
1544
|
+
Specifies the columns and it's values to update.
|
|
1545
|
+
Types: dict
|
|
1546
|
+
|
|
1547
|
+
table_name:
|
|
1548
|
+
Required Argument.
|
|
1549
|
+
Specifies the name of the table to update.
|
|
1550
|
+
Types: str
|
|
1551
|
+
|
|
1552
|
+
schema_name:
|
|
1553
|
+
Required Argument.
|
|
1554
|
+
Specifies the name of the database to update the data in the
|
|
1555
|
+
table "table_name".
|
|
1556
|
+
Types: str
|
|
1557
|
+
|
|
1558
|
+
datalake_name:
|
|
1559
|
+
Optional Argument.
|
|
1560
|
+
Specifies the name of the datalake to look for "schema_name".
|
|
1561
|
+
Types: str
|
|
1562
|
+
|
|
1563
|
+
update_conditions:
|
|
1564
|
+
Optional Argument.
|
|
1565
|
+
Specifies the key columns and it's values which is used as condition
|
|
1566
|
+
for updating the records.
|
|
1567
|
+
Types: dict
|
|
1568
|
+
|
|
1569
|
+
RETURNS:
|
|
1570
|
+
bool
|
|
1571
|
+
|
|
1572
|
+
RAISES:
|
|
1573
|
+
TeradataMlException.
|
|
1574
|
+
|
|
1575
|
+
EXAMPLES:
|
|
1576
|
+
>>> from teradataml.dbutils.dbutils import _update_data
|
|
1577
|
+
>>> _update_data("db_name1", "tbl", update_conditions={"column1": "value1"})
|
|
1578
|
+
"""
|
|
1579
|
+
# Prepare the update clause.
|
|
1580
|
+
update_clause = ", ".join(("{} = ?".format(col) for col in update_columns_values))
|
|
1581
|
+
update_values = tuple((_value for _value in update_columns_values.values()))
|
|
1582
|
+
|
|
1583
|
+
# If key_columns_values is passed, then prepare the SQL with where clause.
|
|
1584
|
+
# Else, simply update every thing.
|
|
1585
|
+
qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
|
|
1586
|
+
|
|
1587
|
+
get_str_ = lambda val: "'{}'".format(val) if isinstance(val, str) else val
|
|
1588
|
+
if update_conditions:
|
|
1589
|
+
|
|
1590
|
+
# Prepare where clause.
|
|
1591
|
+
where_ = []
|
|
1592
|
+
for column, col_value in update_conditions.items():
|
|
1593
|
+
if isinstance(col_value, list):
|
|
1594
|
+
col_value = ", ".join(get_str_(val) for val in col_value)
|
|
1595
|
+
col_value = "({})".format(col_value)
|
|
1596
|
+
where_.append("{} IN {}".format(column, col_value))
|
|
1597
|
+
else:
|
|
1598
|
+
where_.append("{} = {}".format(column, col_value))
|
|
1599
|
+
|
|
1600
|
+
where_clause = " AND ".join(where_)
|
|
1601
|
+
|
|
1602
|
+
sql = f"""UPDATE {qualified_table_name} SET {update_clause}
|
|
1603
|
+
WHERE {where_clause}
|
|
1604
|
+
"""
|
|
1605
|
+
|
|
1606
|
+
execute_sql(sql, (*update_values,))
|
|
1607
|
+
|
|
1608
|
+
else:
|
|
1609
|
+
sql = f"""UPDATE {qualified_table_name} SET {update_clause}"""
|
|
1610
|
+
|
|
1611
|
+
execute_sql(sql, update_values)
|
|
1612
|
+
return True
|
|
1613
|
+
|
|
1614
|
+
|
|
1615
|
+
def _insert_data(table_name, values, columns=None, schema_name=None, datalake_name=None):
|
|
1616
|
+
"""
|
|
1617
|
+
DESCRIPTION:
|
|
1618
|
+
Internal function to insert the data in a table.
|
|
1619
|
+
|
|
1620
|
+
PARAMETERS:
|
|
1621
|
+
table_name:
|
|
1622
|
+
Required Argument.
|
|
1623
|
+
Specifies the name of the table to insert.
|
|
1624
|
+
Types: str
|
|
1625
|
+
|
|
1626
|
+
values:
|
|
1627
|
+
Required Argument.
|
|
1628
|
+
Specifies the values to insert.
|
|
1629
|
+
Types: tuple or list of tuple
|
|
1630
|
+
|
|
1631
|
+
columns:
|
|
1632
|
+
Optional Argument.
|
|
1633
|
+
Specifies the name of columns to be involved in insert.
|
|
1634
|
+
Types: list
|
|
1635
|
+
|
|
1636
|
+
schema_name:
|
|
1637
|
+
Optional Argument.
|
|
1638
|
+
Specifies the name of the database to insert the data in the
|
|
1639
|
+
table "table_name".
|
|
1640
|
+
Types: str
|
|
1641
|
+
|
|
1642
|
+
datalake_name:
|
|
1643
|
+
Optional Argument.
|
|
1644
|
+
Specifies the name of the datalake to look for "schema_name".
|
|
1645
|
+
Types: str
|
|
1646
|
+
|
|
1647
|
+
RETURNS:
|
|
1648
|
+
bool
|
|
1649
|
+
|
|
1650
|
+
RAISES:
|
|
1651
|
+
TeradataMlException.
|
|
1652
|
+
|
|
1653
|
+
EXAMPLES:
|
|
1654
|
+
>>> from teradataml.dbutils.dbutils import _insert_data
|
|
1655
|
+
>>> _insert_data("tbl", (1, 2, 3))
|
|
1656
|
+
"""
|
|
1657
|
+
# Prepare the update clause.
|
|
1658
|
+
qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
|
|
1659
|
+
|
|
1660
|
+
values = UtilFuncs._as_list(values)
|
|
1661
|
+
|
|
1662
|
+
# Prepare columns clause.
|
|
1663
|
+
if columns:
|
|
1664
|
+
# Prepare question marks.
|
|
1665
|
+
_q_marks = ["?"] * len(columns)
|
|
1666
|
+
columns = "({})".format(", ".join(columns))
|
|
1667
|
+
else:
|
|
1668
|
+
columns = ""
|
|
1669
|
+
_q_marks = ["?"] * (len(values[0]))
|
|
1670
|
+
|
|
1671
|
+
sql = "insert into {} {} values ({});".format(qualified_table_name, columns, ", ".join(_q_marks))
|
|
1672
|
+
execute_sql(sql, values)
|
|
1673
|
+
|
|
1674
|
+
return True
|
|
1675
|
+
|
|
1676
|
+
|
|
1677
|
+
def _upsert_data(update_columns_values,
|
|
1678
|
+
insert_columns_values,
|
|
1679
|
+
upsert_conditions,
|
|
1680
|
+
table_name,
|
|
1681
|
+
schema_name,
|
|
1682
|
+
datalake_name=None):
|
|
1683
|
+
"""
|
|
1684
|
+
DESCRIPTION:
|
|
1685
|
+
Internal function to either insert or update the data to a table.
|
|
1686
|
+
|
|
1687
|
+
PARAMETERS:
|
|
1688
|
+
update_columns_values:
|
|
1689
|
+
Required Argument.
|
|
1690
|
+
Specifies the columns and it's values to update.
|
|
1691
|
+
Types: dict
|
|
1692
|
+
|
|
1693
|
+
insert_columns_values:
|
|
1694
|
+
Required Argument.
|
|
1695
|
+
Specifies the columns and it's values to insert.
|
|
1696
|
+
Types: dict
|
|
1697
|
+
|
|
1698
|
+
upsert_conditions:
|
|
1699
|
+
Required Argument.
|
|
1700
|
+
Specifies the key columns and it's values which is used as condition
|
|
1701
|
+
for updating the records.
|
|
1702
|
+
Types: tuple
|
|
1703
|
+
|
|
1704
|
+
table_name:
|
|
1705
|
+
Required Argument.
|
|
1706
|
+
Specifies the name of the table to insert.
|
|
1707
|
+
Types: str
|
|
1708
|
+
|
|
1709
|
+
schema_name:
|
|
1710
|
+
Required Argument.
|
|
1711
|
+
Specifies the name of the database to update the data in the
|
|
1712
|
+
table "table_name".
|
|
1713
|
+
Types: str
|
|
1714
|
+
|
|
1715
|
+
datalake_name:
|
|
1716
|
+
Optional Argument.
|
|
1717
|
+
Specifies the name of the datalake to look for "schema_name".
|
|
1718
|
+
Note:
|
|
1719
|
+
"schema_name" must be provided while using this argument.
|
|
1720
|
+
Types: str
|
|
1721
|
+
|
|
1722
|
+
RETURNS:
|
|
1723
|
+
bool
|
|
1724
|
+
|
|
1725
|
+
RAISES:
|
|
1726
|
+
TeradataMlException.
|
|
1727
|
+
|
|
1728
|
+
EXAMPLES:
|
|
1729
|
+
>>> from teradataml.dbutils.dbutils import _upsert_data
|
|
1730
|
+
>>> _upsert_data("db_name1",
|
|
1731
|
+
"tbl",
|
|
1732
|
+
update_columns_values={"column1": "value1"},
|
|
1733
|
+
insert_columns_values={"column1": "value2"},
|
|
1734
|
+
upsert_conditions={"key1": "val1"}
|
|
1735
|
+
)
|
|
1736
|
+
"""
|
|
1737
|
+
# If user passes datalake name, then append the same to schema name.
|
|
1738
|
+
qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
|
|
1739
|
+
|
|
1740
|
+
# Prepare the update clause.
|
|
1741
|
+
update_clause = ", ".join(("{} = ?".format(col) for col in update_columns_values))
|
|
1742
|
+
update_values = tuple((_value for _value in update_columns_values.values()))
|
|
1743
|
+
|
|
1744
|
+
# Prepare the where clause and it's values.
|
|
1745
|
+
where_clause = " AND ".join(("{} = ?".format(col) for col in upsert_conditions))
|
|
1746
|
+
where_values = tuple((_value for _value in upsert_conditions.values()))
|
|
1747
|
+
|
|
1748
|
+
# Prepare the insert clause and it's values.
|
|
1749
|
+
insert_values_clause = ", ".join(("?" for _ in range(len(insert_columns_values))))
|
|
1750
|
+
insert_clause = "({}) values ({})".format(", ".join(insert_columns_values), insert_values_clause)
|
|
1751
|
+
insert_values = tuple((_value for _value in insert_columns_values.values()))
|
|
1752
|
+
|
|
1753
|
+
sql = f"""UPDATE {qualified_table_name} SET {update_clause}
|
|
1754
|
+
WHERE {where_clause}
|
|
1755
|
+
ELSE INSERT {qualified_table_name} {insert_clause}
|
|
1756
|
+
"""
|
|
1757
|
+
execute_sql(sql, (*update_values, *where_values, *insert_values))
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
def _delete_data(table_name, schema_name=None, datalake_name=None, delete_conditions=None):
|
|
1761
|
+
"""
|
|
1762
|
+
DESCRIPTION:
|
|
1763
|
+
Internal function to delete the data in a table.
|
|
1764
|
+
|
|
1765
|
+
PARAMETERS:
|
|
1766
|
+
table_name:
|
|
1767
|
+
Required Argument.
|
|
1768
|
+
Specifies the name of the table to delete.
|
|
1769
|
+
Types: str
|
|
1770
|
+
|
|
1771
|
+
schema_name:
|
|
1772
|
+
Optional Argument.
|
|
1773
|
+
Specifies the name of the database to delete the data in the
|
|
1774
|
+
table "table_name".
|
|
1775
|
+
Types: str
|
|
1776
|
+
|
|
1777
|
+
datalake_name:
|
|
1778
|
+
Optional Argument.
|
|
1779
|
+
Specifies the name of the datalake to look for "schema_name".
|
|
1780
|
+
Types: str
|
|
1781
|
+
|
|
1782
|
+
delete_conditions:
|
|
1783
|
+
Optional Argument.
|
|
1784
|
+
Specifies the ColumnExpression or dictionary containing key values
|
|
1785
|
+
pairs to use for removing the data.
|
|
1786
|
+
Types: ColumnExpression, dict
|
|
1787
|
+
|
|
1788
|
+
RETURNS:
|
|
1789
|
+
int, specifies the number of records those are deleted.
|
|
1790
|
+
|
|
1791
|
+
RAISES:
|
|
1792
|
+
TeradataMlException.
|
|
1793
|
+
|
|
1794
|
+
EXAMPLES:
|
|
1795
|
+
>>> from teradataml.dbutils.dbutils import _delete_data
|
|
1796
|
+
>>> _delete_data("tbl", "db_name1", delete_conditions={"column1": "value1"})
|
|
1797
|
+
"""
|
|
1798
|
+
qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
|
|
1799
|
+
sqlbundle = SQLBundle()
|
|
1800
|
+
|
|
1801
|
+
sql = sqlbundle._get_sql_query(SQLConstants.SQL_DELETE_ALL_ROWS).format(qualified_table_name)
|
|
1802
|
+
|
|
1803
|
+
# If condition exist, the prepare where clause.
|
|
1804
|
+
if delete_conditions:
|
|
1805
|
+
from teradataml.dataframe.sql import _SQLColumnExpression
|
|
1806
|
+
if isinstance(delete_conditions, _SQLColumnExpression):
|
|
1807
|
+
where_clause = delete_conditions.compile()
|
|
1808
|
+
elif isinstance(delete_conditions, dict):
|
|
1809
|
+
get_str_ = lambda val: "'{}'".format(val) if isinstance(val, str) else val
|
|
1810
|
+
where_ = []
|
|
1811
|
+
for column, col_value in delete_conditions.items():
|
|
1812
|
+
if isinstance(col_value, list):
|
|
1813
|
+
col_value = ", ".join(get_str_(val) for val in col_value)
|
|
1814
|
+
col_value = "({})".format(col_value)
|
|
1815
|
+
where_.append("{} IN {}".format(column, col_value))
|
|
1816
|
+
else:
|
|
1817
|
+
where_.append("{} = {}".format(column, col_value))
|
|
1818
|
+
where_clause = " AND ".join(where_)
|
|
1819
|
+
|
|
1820
|
+
sql = sqlbundle._get_sql_query(SQLConstants.SQL_DELETE_SPECIFIC_ROW).format(qualified_table_name, where_clause)
|
|
1821
|
+
|
|
1822
|
+
res = execute_sql(sql)
|
|
1823
|
+
return res.rowcount
|
|
1824
|
+
|
|
1825
|
+
|
|
1018
1826
|
@collect_queryband(queryband='LstKwrds')
|
|
1019
1827
|
def list_td_reserved_keywords(key=None, raise_error=False):
|
|
1020
1828
|
"""
|
|
@@ -1079,6 +1887,7 @@ def list_td_reserved_keywords(key=None, raise_error=False):
|
|
|
1079
1887
|
"""
|
|
1080
1888
|
|
|
1081
1889
|
from teradataml.dataframe.dataframe import DataFrame, in_schema
|
|
1890
|
+
|
|
1082
1891
|
# Get the reserved keywords from the table
|
|
1083
1892
|
reserved_keys = DataFrame(in_schema("SYSLIB", "SQLRestrictedWords"))
|
|
1084
1893
|
|
|
@@ -1096,10 +1905,10 @@ def list_td_reserved_keywords(key=None, raise_error=False):
|
|
|
1096
1905
|
# Check if key contains Teradata reserved keyword or not.
|
|
1097
1906
|
res_key = (k.upper() for k in key if k.upper() in reservered_words)
|
|
1098
1907
|
res_key = list(res_key)
|
|
1099
|
-
if len(res_key)>0:
|
|
1908
|
+
if len(res_key) > 0:
|
|
1100
1909
|
if raise_error:
|
|
1101
1910
|
raise TeradataMlException(Messages.get_message(MessageCodes.RESERVED_KEYWORD, res_key),
|
|
1102
|
-
|
|
1911
|
+
MessageCodes.RESERVED_KEYWORD)
|
|
1103
1912
|
return True
|
|
1104
1913
|
return False
|
|
1105
1914
|
|
|
@@ -1189,6 +1998,7 @@ def _execute_query_and_generate_pandas_df(query, index=None, **kwargs):
|
|
|
1189
1998
|
|
|
1190
1999
|
return pandas_df
|
|
1191
2000
|
|
|
2001
|
+
|
|
1192
2002
|
class _TDSessionParams:
|
|
1193
2003
|
"""
|
|
1194
2004
|
A successfull connection through teradataml establishes a session with Vantage.
|
|
@@ -1196,6 +2006,7 @@ class _TDSessionParams:
|
|
|
1196
2006
|
for parameter 'Session Time Zone'.
|
|
1197
2007
|
This is an internal utility to store all session related parameters.
|
|
1198
2008
|
"""
|
|
2009
|
+
|
|
1199
2010
|
def __init__(self, data):
|
|
1200
2011
|
"""
|
|
1201
2012
|
Constructor to store columns and rows of session params.
|
|
@@ -1222,6 +2033,7 @@ class _TDSessionParams:
|
|
|
1222
2033
|
return self.__session_params[parameter]
|
|
1223
2034
|
raise AttributeError("'TDSessionParams' object has no attribute '{}'".format(parameter))
|
|
1224
2035
|
|
|
2036
|
+
|
|
1225
2037
|
def set_session_param(name, value):
|
|
1226
2038
|
"""
|
|
1227
2039
|
DESCRIPTION:
|
|
@@ -1397,15 +2209,16 @@ def set_session_param(name, value):
|
|
|
1397
2209
|
[param[0] for param in result.description],
|
|
1398
2210
|
[value for value in next(result)]
|
|
1399
2211
|
))
|
|
1400
|
-
_InternalBuffer.add(session_params
|
|
2212
|
+
_InternalBuffer.add(session_params=_TDSessionParams(data))
|
|
1401
2213
|
# Store function name of 'DEBUG_FUNCTION' used.
|
|
1402
|
-
_InternalBuffer.add(function_name
|
|
2214
|
+
_InternalBuffer.add(function_name=value[0] if name.upper() == 'DEBUG_FUNCTION' else '')
|
|
1403
2215
|
|
|
1404
2216
|
# Set the session parameter.
|
|
1405
2217
|
execute_sql(getattr(SessionParamsSQL, name.upper()).format(*value))
|
|
1406
2218
|
|
|
1407
2219
|
return True
|
|
1408
2220
|
|
|
2221
|
+
|
|
1409
2222
|
def unset_session_param(name):
|
|
1410
2223
|
"""
|
|
1411
2224
|
DESCRIPTION:
|
|
@@ -1449,7 +2262,7 @@ def unset_session_param(name):
|
|
|
1449
2262
|
# unset_values stores params which are not available in _InternalBuffer, to unset create a dictionary
|
|
1450
2263
|
# with param as key and unset param as value
|
|
1451
2264
|
unset_values = {"CHARACTER_SET_UNICODE": "OFF", "DEBUG_FUNCTION": [_InternalBuffer.get('function_name'), "OFF"],
|
|
1452
|
-
"ISOLATED_LOADING":"NO", "FUNCTION_TRACE":"SET SESSION FUNCTION TRACE OFF",
|
|
2265
|
+
"ISOLATED_LOADING": "NO", "FUNCTION_TRACE": "SET SESSION FUNCTION TRACE OFF",
|
|
1453
2266
|
"JSON_IGNORE_ERRORS": "OFF", "QUERY_BAND": ["", "SESSION"]}
|
|
1454
2267
|
|
|
1455
2268
|
# If 'name' in unset_values unset the params
|
|
@@ -1463,11 +2276,132 @@ def unset_session_param(name):
|
|
|
1463
2276
|
return True
|
|
1464
2277
|
|
|
1465
2278
|
previous_value = "{}".format(session_params[getattr(SessionParamsPythonNames, name.upper())]) \
|
|
1466
|
-
if name.upper() != 'TIMEZONE' else "'{}'".format(
|
|
1467
|
-
|
|
2279
|
+
if name.upper() != 'TIMEZONE' else "'{}'".format(
|
|
2280
|
+
session_params[getattr(SessionParamsPythonNames, name.upper())])
|
|
2281
|
+
|
|
1468
2282
|
if name.upper() == "ACCOUNT":
|
|
1469
2283
|
previous_value = [previous_value, 'SESSION']
|
|
1470
2284
|
set_session_param(name, previous_value)
|
|
1471
2285
|
|
|
1472
2286
|
return True
|
|
1473
2287
|
|
|
2288
|
+
|
|
2289
|
+
class _Authorize:
|
|
2290
|
+
""" Parent class to either provide or revoke access on table(s). """
|
|
2291
|
+
_property = None
|
|
2292
|
+
|
|
2293
|
+
def __init__(self, objects):
|
|
2294
|
+
"""
|
|
2295
|
+
DESCRIPTION:
|
|
2296
|
+
Constructor for creating Authorize object.
|
|
2297
|
+
|
|
2298
|
+
PARAMETERS:
|
|
2299
|
+
objects:
|
|
2300
|
+
Required Argument.
|
|
2301
|
+
Specifies the name(s) of the database objects to be authorized.
|
|
2302
|
+
Types: str OR list of str.
|
|
2303
|
+
|
|
2304
|
+
RETURNS:
|
|
2305
|
+
Object of _Authorize.
|
|
2306
|
+
|
|
2307
|
+
RAISES:
|
|
2308
|
+
None
|
|
2309
|
+
|
|
2310
|
+
EXAMPLES:
|
|
2311
|
+
>>> auth = _Authorize('vfs_v1')
|
|
2312
|
+
"""
|
|
2313
|
+
# Store the objects here. Then use this where ever required.
|
|
2314
|
+
self._objects = objects
|
|
2315
|
+
self._access_method = self.__class__.__name__.upper()
|
|
2316
|
+
|
|
2317
|
+
def read(self, user):
|
|
2318
|
+
"""
|
|
2319
|
+
DESCRIPTION:
|
|
2320
|
+
Authorize the read access.
|
|
2321
|
+
Note:
|
|
2322
|
+
One must have admin access to give read access to other "user".
|
|
2323
|
+
|
|
2324
|
+
PARAMETERS:
|
|
2325
|
+
user:
|
|
2326
|
+
Required Argument.
|
|
2327
|
+
Specifies the name of the user to have read only access.
|
|
2328
|
+
Types: str
|
|
2329
|
+
|
|
2330
|
+
RETURNS:
|
|
2331
|
+
bool.
|
|
2332
|
+
|
|
2333
|
+
RAISES:
|
|
2334
|
+
None
|
|
2335
|
+
|
|
2336
|
+
EXAMPLES:
|
|
2337
|
+
>>> _Authorize('repo').read('BoB')
|
|
2338
|
+
"""
|
|
2339
|
+
for object in self._objects:
|
|
2340
|
+
sql = "{} SELECT ON {} {} {}".format(self._access_method, object, self._property, user)
|
|
2341
|
+
execute_sql(sql)
|
|
2342
|
+
|
|
2343
|
+
return True
|
|
2344
|
+
|
|
2345
|
+
def write(self, user):
|
|
2346
|
+
"""
|
|
2347
|
+
DESCRIPTION:
|
|
2348
|
+
Authorize the write access.
|
|
2349
|
+
Note:
|
|
2350
|
+
One must have admin access to give write access to other "user".
|
|
2351
|
+
|
|
2352
|
+
PARAMETERS:
|
|
2353
|
+
user:
|
|
2354
|
+
Required Argument.
|
|
2355
|
+
Specifies the name of the user to have write only access.
|
|
2356
|
+
Types: str
|
|
2357
|
+
|
|
2358
|
+
RETURNS:
|
|
2359
|
+
bool.
|
|
2360
|
+
|
|
2361
|
+
RAISES:
|
|
2362
|
+
None
|
|
2363
|
+
|
|
2364
|
+
EXAMPLES:
|
|
2365
|
+
>>> _Authorize('repo').write('BoB')
|
|
2366
|
+
"""
|
|
2367
|
+
for access_type in ["INSERT", "UPDATE", "DELETE"]:
|
|
2368
|
+
for object in self._objects:
|
|
2369
|
+
sql = "{} {} ON {} {} {}".format(self._access_method, access_type, object, self._property, user)
|
|
2370
|
+
execute_sql(sql)
|
|
2371
|
+
|
|
2372
|
+
return True
|
|
2373
|
+
|
|
2374
|
+
def read_write(self, user):
|
|
2375
|
+
"""
|
|
2376
|
+
DESCRIPTION:
|
|
2377
|
+
Authorize the read and write access.
|
|
2378
|
+
Note:
|
|
2379
|
+
One must have admin access to give read and write access to other "user".
|
|
2380
|
+
|
|
2381
|
+
PARAMETERS:
|
|
2382
|
+
user:
|
|
2383
|
+
Required Argument.
|
|
2384
|
+
Specifies the name of the user to have read and write access.
|
|
2385
|
+
Types: str
|
|
2386
|
+
|
|
2387
|
+
RETURNS:
|
|
2388
|
+
bool.
|
|
2389
|
+
|
|
2390
|
+
RAISES:
|
|
2391
|
+
None
|
|
2392
|
+
|
|
2393
|
+
EXAMPLES:
|
|
2394
|
+
>>> _Authorize('repo').read_write('BoB')
|
|
2395
|
+
"""
|
|
2396
|
+
self.read(user)
|
|
2397
|
+
return self.write(user)
|
|
2398
|
+
|
|
2399
|
+
|
|
2400
|
+
class Grant(_Authorize):
|
|
2401
|
+
""" Class to grant access to tables."""
|
|
2402
|
+
_property = "TO"
|
|
2403
|
+
|
|
2404
|
+
|
|
2405
|
+
class Revoke(_Authorize):
|
|
2406
|
+
""" Class to revoke access from tables."""
|
|
2407
|
+
_property = "FROM"
|