teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (84) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +119 -0
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +18 -6
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/sqle/__init__.py +4 -1
  7. teradataml/analytics/valib.py +18 -4
  8. teradataml/automl/__init__.py +51 -6
  9. teradataml/automl/data_preparation.py +56 -33
  10. teradataml/automl/data_transformation.py +58 -33
  11. teradataml/automl/feature_engineering.py +12 -5
  12. teradataml/automl/model_training.py +34 -13
  13. teradataml/common/__init__.py +1 -2
  14. teradataml/common/constants.py +64 -40
  15. teradataml/common/messagecodes.py +13 -3
  16. teradataml/common/messages.py +4 -1
  17. teradataml/common/sqlbundle.py +40 -10
  18. teradataml/common/utils.py +113 -39
  19. teradataml/common/warnings.py +11 -0
  20. teradataml/context/context.py +141 -17
  21. teradataml/data/amazon_reviews_25.csv +26 -0
  22. teradataml/data/byom_example.json +11 -0
  23. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  24. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  25. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  26. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  27. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  28. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  29. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  30. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  31. teradataml/data/hnsw_alter_data.csv +5 -0
  32. teradataml/data/hnsw_data.csv +10 -0
  33. teradataml/data/jsons/byom/h2opredict.json +1 -1
  34. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  35. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  36. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  37. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  38. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  39. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  40. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +1 -1
  41. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +5 -5
  42. teradataml/data/teradataml_example.json +8 -0
  43. teradataml/data/vectordistance_example.json +1 -1
  44. teradataml/dataframe/copy_to.py +8 -3
  45. teradataml/dataframe/data_transfer.py +11 -1
  46. teradataml/dataframe/dataframe.py +517 -121
  47. teradataml/dataframe/dataframe_utils.py +152 -20
  48. teradataml/dataframe/functions.py +26 -11
  49. teradataml/dataframe/setop.py +11 -6
  50. teradataml/dataframe/sql.py +2 -2
  51. teradataml/dbutils/dbutils.py +525 -129
  52. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  53. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +317 -1011
  54. teradataml/opensource/_class.py +141 -17
  55. teradataml/opensource/{constants.py → _constants.py} +7 -3
  56. teradataml/opensource/_lightgbm.py +52 -53
  57. teradataml/opensource/_sklearn.py +1008 -0
  58. teradataml/opensource/_wrapper_utils.py +5 -5
  59. teradataml/options/__init__.py +47 -15
  60. teradataml/options/configure.py +103 -25
  61. teradataml/options/display.py +13 -2
  62. teradataml/plot/axis.py +47 -8
  63. teradataml/plot/figure.py +33 -0
  64. teradataml/plot/plot.py +63 -13
  65. teradataml/scriptmgmt/UserEnv.py +2 -2
  66. teradataml/scriptmgmt/lls_utils.py +63 -26
  67. teradataml/store/__init__.py +1 -2
  68. teradataml/store/feature_store/feature_store.py +102 -7
  69. teradataml/table_operators/Apply.py +32 -18
  70. teradataml/table_operators/Script.py +3 -1
  71. teradataml/table_operators/TableOperator.py +3 -1
  72. teradataml/utils/dtypes.py +47 -0
  73. teradataml/utils/internal_buffer.py +18 -0
  74. teradataml/utils/validators.py +68 -9
  75. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +123 -2
  76. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +79 -75
  77. teradataml/data/SQL_Fundamentals.pdf +0 -0
  78. teradataml/libaed_0_1.dylib +0 -0
  79. teradataml/libaed_0_1.so +0 -0
  80. teradataml/opensource/sklearn/__init__.py +0 -0
  81. teradataml/store/vector_store/__init__.py +0 -1586
  82. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  83. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  84. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -9,35 +9,45 @@ teradataml db utilities
9
9
  ----------
10
10
  A teradataml database utility functions provide interface to Teradata Vantage common tasks such as drop_table, drop_view, create_table etc.
11
11
  """
12
- import concurrent.futures, json, os, tempfile, shutil
12
+ import concurrent.futures
13
+ import json
14
+ import os
15
+ import re
16
+ import shutil
17
+ import tempfile
13
18
  from datetime import datetime
19
+
14
20
  import pandas as pd
21
+ from sqlalchemy import (CheckConstraint, Column, ForeignKeyConstraint,
22
+ MetaData, PrimaryKeyConstraint, Table,
23
+ UniqueConstraint)
15
24
  from sqlalchemy.sql.functions import Function
16
- from teradataml.context import context as tdmlctx
17
- from teradataml.common.utils import UtilFuncs
18
- from teradataml.common.messages import Messages
19
- from teradataml.common.messagecodes import MessageCodes
25
+ from teradatasql import OperationalError
26
+ from teradatasqlalchemy.dialect import TDCreateTablePost as post
27
+ from teradatasqlalchemy.dialect import dialect as td_dialect
28
+ from teradatasqlalchemy.dialect import preparer
29
+
30
+ import teradataml.dataframe as tdmldf
31
+ from teradataml.common.constants import (SessionParamsPythonNames,
32
+ SessionParamsSQL, SQLConstants,
33
+ TableOperatorConstants,
34
+ TeradataTableKindConstants)
20
35
  from teradataml.common.exceptions import TeradataMlException
21
- from teradataml.common.constants import TeradataTableKindConstants
36
+ from teradataml.common.messagecodes import MessageCodes
37
+ from teradataml.common.messages import Messages
22
38
  from teradataml.common.sqlbundle import SQLBundle
23
- from teradataml.common.constants import SQLConstants, SessionParamsSQL, SessionParamsPythonNames
24
- from teradataml.common.constants import TableOperatorConstants
25
- import teradataml.dataframe as tdmldf
39
+ from teradataml.common.utils import UtilFuncs
40
+ from teradataml.context import context as tdmlctx
26
41
  from teradataml.options.configure import configure
27
- from teradataml.utils.utils import execute_sql
28
- from teradataml.utils.validators import _Validators
29
- from teradataml.utils.internal_buffer import _InternalBuffer
30
- from teradatasql import OperationalError
31
- from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
32
- from teradatasqlalchemy.dialect import TDCreateTablePost as post
33
42
  from teradataml.telemetry_utils.queryband import collect_queryband
34
- from sqlalchemy import Table, Column, MetaData, CheckConstraint, \
35
- PrimaryKeyConstraint, ForeignKeyConstraint, UniqueConstraint
36
43
  from teradataml.utils.internal_buffer import _InternalBuffer
44
+ from teradataml.utils.utils import execute_sql
45
+ from teradataml.utils.validators import _Validators
37
46
 
38
47
 
39
48
  @collect_queryband(queryband='DrpTbl')
40
- def db_drop_table(table_name, schema_name=None, suppress_error=False):
49
+ def db_drop_table(table_name, schema_name=None, suppress_error=False,
50
+ datalake_name=None, purge=None):
41
51
  """
42
52
  DESCRIPTION:
43
53
  Drops the table from the given schema.
@@ -61,6 +71,22 @@ def db_drop_table(table_name, schema_name=None, suppress_error=False):
61
71
  Default Value: False
62
72
  Types: str
63
73
 
74
+ datalake_name:
75
+ Optional Argument
76
+ Specifies name of the datalake to drop table from.
77
+ Note:
78
+ "schema_name" must be provided while using this argument.
79
+ Default Value: None
80
+ Types: str
81
+
82
+ purge:
83
+ Optional Argument
84
+ Specifies whether to use purge clause or not while dropping datalake table.
85
+ It is only applicable when "datalake_name" argument is used. When "datalake_name" is specified,
86
+ but "purge" is not specified, data is purged by default.
87
+ Default Value: None
88
+ Types: bool
89
+
64
90
  RETURNS:
65
91
  True - if the operation is successful.
66
92
 
@@ -70,25 +96,44 @@ def db_drop_table(table_name, schema_name=None, suppress_error=False):
70
96
  EXAMPLES:
71
97
  >>> load_example_data("dataframe", "admissions_train")
72
98
 
73
- # Drop table in current database
99
+ # Example 1: Drop table in current database.
74
100
  >>> db_drop_table(table_name = 'admissions_train')
75
101
 
76
- # Drop table from the given schema
102
+ # Example 2: Drop table from the given schema.
77
103
  >>> db_drop_table(table_name = 'admissions_train', schema_name = 'alice')
104
+
105
+ #Example 3: Drop a table from datalake and purge the data.
106
+ >>> db_drop_table(table_name = 'datalake_table', schema_name = 'datalake_db',
107
+ ... datalake_name='datalake', purge=True)
108
+
78
109
  """
79
110
  # Argument validations
80
111
  awu_matrix = []
81
112
  awu_matrix.append(["schema_name", schema_name, True, (str), True])
82
113
  awu_matrix.append(["table_name", table_name, False, (str), True])
83
-
114
+ awu_matrix.append(["datalake_name", datalake_name, True, (str), True])
115
+ awu_matrix.append(["purge", purge, True, (bool, type(None)), True])
84
116
  # Validate argument types
85
117
  _Validators._validate_function_arguments(awu_matrix)
86
118
 
119
+ # Process datalake related arguments.
120
+ purge_clause = None
121
+ if datalake_name is not None:
122
+ if schema_name is None:
123
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "schema_name",
124
+ "datalake_name")
125
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
126
+
127
+ if purge is False:
128
+ purge_clause = "NO PURGE"
129
+ else:
130
+ purge_clause = "PURGE ALL"
131
+
87
132
  # Joining view and schema names in the format "schema_name"."view_name"
88
- table_name = _get_quoted_object_name(schema_name, table_name)
133
+ table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
89
134
 
90
135
  try:
91
- return UtilFuncs._drop_table(table_name)
136
+ return UtilFuncs._drop_table(table_name, purge_clause=purge_clause)
92
137
  except (TeradataMlException, OperationalError):
93
138
  if suppress_error:
94
139
  pass
@@ -162,7 +207,7 @@ def db_drop_view(view_name, schema_name=None):
162
207
 
163
208
 
164
209
  @collect_queryband(queryband='LstTbls')
165
- def db_list_tables(schema_name=None, object_name=None, object_type='all'):
210
+ def db_list_tables(schema_name=None, object_name=None, object_type='all', datalake_name=None):
166
211
  """
167
212
  DESCRIPTION:
168
213
  Lists the Vantage objects(table/view) names for the specified schema name.
@@ -179,10 +224,10 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
179
224
  Optional Argument.
180
225
  Specifies a table/view name or pattern to be used for filtering them from the database.
181
226
  Pattern may contain '%' or '_' as pattern matching characters.
182
- A '%' represents any string of zero or more arbitrary characters. Any string of characters is acceptable as
183
- a replacement for the percent.
184
- A '_' represents exactly one arbitrary character. Any single character is acceptable in the position in
185
- which the underscore character appears.
227
+ - '%' represents any string of zero or more arbitrary characters. Any string of characters is acceptable as
228
+ a replacement for the percent.
229
+ - '_' represents exactly one arbitrary character. Any single character is acceptable in the position in
230
+ which the underscore character appears.
186
231
  Note:
187
232
  * If '%' is specified in 'object_name', then the '_' character is not evaluated for an arbitrary character.
188
233
  Default Value: None
@@ -203,6 +248,14 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
203
248
  Default Value: 'all'
204
249
  Types: str
205
250
 
251
+ datalake_name:
252
+ Optional Argument.
253
+ Specifies the name of datalake to list tables from.
254
+ Note:
255
+ "schema_name" must be provided while using this argument.
256
+ Default Value: None
257
+ Types: str
258
+
206
259
  RETURNS:
207
260
  Pandas DataFrame
208
261
 
@@ -211,38 +264,40 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
211
264
  OperationalError - If any errors are raised from Vantage.
212
265
 
213
266
  EXAMPLES:
214
- # Example 1 - List all object types in the default schema
267
+ # Example 1: List all object types in the default schema
215
268
  >>> load_example_data("dataframe", "admissions_train")
216
269
  >>> db_list_tables()
217
270
 
218
- # Example 2 - List all the views in the default schema
271
+ # Example 2: List all the views in the default schema
219
272
  >>> execute_sql("create view temporary_view as (select 1 as dummy_col1, 2 as dummy_col2);")
220
273
  >>> db_list_tables(None , None, 'view')
221
274
 
222
- # Example 3 - List all the object types in the default schema whose names begin with 'abc' followed by any number
275
+ # Example 3: List all the object types in the default schema whose names begin with 'abc' followed by any number
223
276
  # of characters in the end.
224
277
  >>> execute_sql("create view abcd123 as (select 1 as dummy_col1, 2 as dummy_col2);")
225
278
  >>> db_list_tables(None, 'abc%', None)
226
279
 
227
- # Example 4 - List all the tables in the default schema whose names begin with 'adm' followed by any number of
280
+ # Example 4: List all the tables in the default schema whose names begin with 'adm' followed by any number of
228
281
  # characters and ends with 'train'.
229
282
  >>> load_example_data("dataframe", "admissions_train")
230
283
  >>> db_list_tables(None, 'adm%train', 'table')
231
284
 
232
- # Example 5 - List all the views in the default schema whose names begin with any character but ends with 'abc'
285
+ # Example 5: List all the views in the default schema whose names begin with any character but ends with 'abc'
233
286
  >>> execute_sql("create view view_abc as (select 1 as dummy_col1, 2 as dummy_col2);")
234
287
  >>> db_list_tables(None, '%abc', 'view')
235
288
 
236
- # Example 6 - List all the volatile tables in the default schema whose names begin with 'abc' and ends with any
289
+ # Example 6: List all the volatile tables in the default schema whose names begin with 'abc' and ends with any
237
290
  # arbitrary character and has a length of 4
238
291
  >>> execute_sql("CREATE volatile TABLE abcd(col0 int, col1 float) NO PRIMARY INDEX;")
239
292
  >>> db_list_tables(None, 'abc_', 'volatile')
240
293
 
241
- # Example 7 - List all the temporary objects created by teradataml in the default schema whose names begins and
294
+ # Example 7: List all the temporary objects created by teradataml in the default schema whose names begins and
242
295
  # ends with any number of arbitrary characters but contains 'filter' in between.
243
296
  >>> db_list_tables(None, '%filter%', 'temp')
244
- """
245
297
 
298
+ # Example 8: List all the tables in datalake's database.
299
+ >>> db_list_tables(schema_name='datalake_db_name', datalake_name='datalake_name')
300
+ """
246
301
  if tdmlctx.get_connection() is None:
247
302
  raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_CONTEXT_CONNECTION),
248
303
  MessageCodes.INVALID_CONTEXT_CONNECTION)
@@ -257,12 +312,18 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
257
312
  TeradataTableKindConstants.VOLATILE.value,
258
313
  TeradataTableKindConstants.TEMP.value]
259
314
  awu_matrix.append(["object_type", object_type, True, (str), True, permitted_object_types])
260
-
315
+ awu_matrix.append(["datalake_name", datalake_name, True, (str), True])
261
316
  # Validate argument types
262
317
  _Validators._validate_function_arguments(awu_matrix)
263
318
 
319
+ # 'schema_name' must be provided while using 'datalake_name'.
320
+ _Validators._validate_dependent_argument(dependent_arg='datalake_name',
321
+ dependent_arg_value=datalake_name,
322
+ independent_arg='schema_name',
323
+ independent_arg_value=schema_name)
324
+
264
325
  try:
265
- return _get_select_table_kind(schema_name, object_name, object_type)
326
+ return _get_select_table_kind(schema_name, object_name, object_type, datalake_name)
266
327
  except TeradataMlException:
267
328
  raise
268
329
  except OperationalError:
@@ -272,21 +333,49 @@ def db_list_tables(schema_name=None, object_name=None, object_type='all'):
272
333
  MessageCodes.LIST_DB_TABLES_FAILED) from err
273
334
 
274
335
 
275
- def _get_select_table_kind(schema_name, table_name, table_kind):
336
+ def _convert_sql_search_string_to_regex(sql_str):
337
+ """Internal function to convert SQL string matching patterns to python regex."""
338
+ if sql_str:
339
+ # sql_str[1:-1] Removes single quotes from sql_str.
340
+ sql_str = sql_str[1:-1]
341
+
342
+ # If '%' is specified in 'sql_str',
343
+ # then the '_' character is not evaluated for an arbitrary character.
344
+ if '%' in sql_str:
345
+ # Replace % with .* if not preceded by a backslash.
346
+ sql_str = re.sub(r'(?<!\\)%', r'.*', sql_str, flags=re.IGNORECASE)
347
+ # Remove the escape character for the replacements.
348
+ sql_str = sql_str.replace(r'\%', '%')
349
+ else:
350
+ # Replace _ with . if not preceded by a backslash.
351
+ sql_str = re.sub(r'(?<!\\)_', r'.', sql_str, flags=re.IGNORECASE)
352
+ # Remove the escape character for the replacements.
353
+ sql_str = sql_str.replace(r'\_', '_')
354
+
355
+ # Add boundaries if the string doesn't start or end with '.*' i.e. SQL '%'.
356
+ if not sql_str.startswith('.*'):
357
+ sql_str = '^' + sql_str # Anchor to the start of the string.
358
+ if not sql_str.endswith('.*'):
359
+ sql_str = sql_str + '$' # Anchor to the end of the string.
360
+ return sql_str
361
+
362
+
363
+ def _get_select_table_kind(schema_name, table_name, table_kind, datalake_name):
276
364
  """
277
- Get the list of the table names from the specified schema name.
365
+ Get the list of the table names from the specified schema name and datalake.
278
366
 
279
367
  PARAMETERS:
280
368
  schema_name - The Name of schema in the database. The default value is the current database name.
281
369
  table_name - The pattern to be used to filtering the table names from the database.
282
- The table name argument can contain '%' as pattern matching charecter.For example '%abc'
283
- will return all table names starting with any charecters and ending with abc.
370
+ The table name argument can contain '%' as pattern matching character.For example '%abc'
371
+ will return all table names starting with any characters and ending with abc.
284
372
  table_kind - The table kind to apply the filter. The valid values are 'all','table','view','volatile','temp'.
285
373
  all - list the all the table kinds.
286
374
  table - list only tables.
287
375
  view - list only views.
288
376
  volatile - list only volatile temp.
289
377
  temp - list all teradata ml temporary objects created in the specified database.
378
+ datalake_name - The name of datalake to search schema in.
290
379
  RETURNS:
291
380
  Panda's DataFrame - if the operation is successful.
292
381
 
@@ -301,60 +390,106 @@ def _get_select_table_kind(schema_name, table_name, table_kind):
301
390
  object_name_str = "'{0}'".format(table_name)
302
391
  object_table_kind = None
303
392
 
304
- # Check the schema name.
305
- if schema_name is None:
306
- schema_name = tdmlctx._get_current_databasename()
307
-
308
- # Check the table kind.
309
- if (table_kind == TeradataTableKindConstants.VOLATILE.value):
310
- query = SQLBundle._build_help_volatile_table()
311
- else:
312
- # Tablekind:
313
- # 'O' - stands for Table with no primary index and no partitioning
314
- # 'Q' - stands for Queue table
315
- # 'T' - stands for a Table with a primary index or primary AMP index, partitioning, or both.
316
- # Or a partitioned table with NoPI
317
- # 'V' - stands for View
318
- if (table_kind == TeradataTableKindConstants.TABLE.value):
319
- object_table_kind = "'{0}','{1}','{2}'".format('O', 'Q', 'T')
320
- elif (table_kind == TeradataTableKindConstants.VIEW.value):
321
- object_table_kind = "'{0}'".format('V')
322
- elif (table_kind == TeradataTableKindConstants.TEMP.value):
323
- if table_name is None:
324
- object_name_str = "'{0}'".format(TeradataTableKindConstants.ML_PATTERN.value)
325
- else:
326
- object_name_str = "'{0}','{1}'".format(table_name,
327
- TeradataTableKindConstants.ML_PATTERN.value)
393
+ # Tablekind:
394
+ # 'O' - stands for Table with no primary index and no partitioning
395
+ # 'Q' - stands for Queue table
396
+ # 'T' - stands for a Table with a primary index or primary AMP index, partitioning, or both.
397
+ # Or a partitioned table with NoPI
398
+ # 'V' - stands for View
399
+ if (table_kind == TeradataTableKindConstants.TABLE.value):
400
+ object_table_kind = ['O', 'Q', 'T']
401
+ elif (table_kind == TeradataTableKindConstants.VIEW.value):
402
+ object_table_kind = ['V']
403
+ elif (table_kind == TeradataTableKindConstants.TEMP.value):
404
+ if table_name is None:
405
+ object_name_str = "'{0}'".format(TeradataTableKindConstants.ML_PATTERN.value)
328
406
  else:
329
- object_table_kind = "'{0}','{1}','{2}','{3}'".format('O', 'Q', 'T', 'V')
330
- query = SQLBundle._build_select_table_kind(schema_name, object_name_str, object_table_kind)
331
-
332
- try:
333
- pddf = pd.read_sql(query, tdmlctx.td_connection.connection)
334
- # Check if all table kind is requested and add also volatile tables to the pdf.
335
- if (table_kind == TeradataTableKindConstants.ALL.value):
407
+ object_name_str = "'{0}','{1}'".format(table_name,
408
+ TeradataTableKindConstants.ML_PATTERN.value)
409
+ else:
410
+ object_table_kind = ['O', 'Q', 'T', 'V']
411
+
412
+ if datalake_name is None:
413
+ # Check the schema name.
414
+ if schema_name is None:
415
+ schema_name = tdmlctx._get_current_databasename()
416
+
417
+ # Create an empty dataframe with desired column name.
418
+ pddf = pd.DataFrame(columns=[TeradataTableKindConstants.REGULAR_TABLE_NAME.value])
419
+
420
+ # Check the table kind.
421
+ if table_kind != TeradataTableKindConstants.VOLATILE.value:
422
+ if object_table_kind is not None:
423
+ object_table_kind = ', '.join([f"'{value}'" for value in object_table_kind])
424
+ query = SQLBundle._build_select_table_kind(schema_name, object_name_str, object_table_kind)
425
+ pddf = pd.read_sql(query, tdmlctx.td_connection.connection)
426
+
427
+ # Check if all table kind or volatile table kind is requested.
428
+ # If so,add volatile tables to the pddf.
429
+ if table_kind == TeradataTableKindConstants.ALL.value or \
430
+ table_kind == TeradataTableKindConstants.VOLATILE.value:
431
+ # Create list of volatile tables.
336
432
  try:
337
- # Add volatile tables to all dataframe.
338
433
  vtquery = SQLBundle._build_help_volatile_table()
339
434
  vtdf = pd.read_sql(vtquery, tdmlctx.td_connection.connection)
340
435
  if not vtdf.empty:
436
+ # Volatile table query returns different column names.
437
+ # So, rename its column names to match with normal
438
+ # 'SELECT TABLENAME FROM DBC.TABLESV' query results.
341
439
  columns_dict = {TeradataTableKindConstants.VOLATILE_TABLE_NAME.value:
342
440
  TeradataTableKindConstants.REGULAR_TABLE_NAME.value}
343
441
  vtdf.rename(columns=columns_dict, inplace=True)
442
+ # Volatile table names might contain leading whitespaces. Remove those.
443
+ vtdf[TeradataTableKindConstants.REGULAR_TABLE_NAME.value] = vtdf[TeradataTableKindConstants.REGULAR_TABLE_NAME.value].str.strip()
444
+ # Filter volatile tables using table name pattern.
445
+ if object_name_str and (object_name_str := _convert_sql_search_string_to_regex(object_name_str)):
446
+ name_filter = vtdf[TeradataTableKindConstants.REGULAR_TABLE_NAME.value].str.strip().str.match(
447
+ object_name_str,
448
+ na=False,
449
+ flags=re.IGNORECASE)
450
+ vtdf = vtdf[name_filter]
451
+ # Concat existing list with volatile tables list.
344
452
  frames = [pddf, vtdf[[TeradataTableKindConstants.REGULAR_TABLE_NAME.value]]]
345
453
  pddf = pd.concat(frames)
346
454
  pddf.reset_index(drop=True, inplace=True)
347
455
  except Exception as err:
348
- # No volatle tables exist.
456
+ # No volatile tables exist.
349
457
  pass
350
- if (table_kind == TeradataTableKindConstants.VOLATILE.value):
351
- columns_dict = {TeradataTableKindConstants.VOLATILE_TABLE_NAME.value:
352
- TeradataTableKindConstants.REGULAR_TABLE_NAME.value}
353
- pddf.rename(columns=columns_dict, inplace=True)
354
- return pddf[[TeradataTableKindConstants.REGULAR_TABLE_NAME.value]]
355
458
  else:
356
459
  return pddf
357
- except Exception as err:
460
+ else:
461
+ # TODO: when OTF team enables VSD support for datalake tables
462
+ # with epic: https://teradata-pe.atlassian.net/browse/OTF-454,
463
+ # this can be changed to use VSD_tablesV table which is
464
+ # similar to DBC.TABLESV.
465
+ # For datalake tables' information we need to use help database and
466
+ # then apply filter for table kind and table substring.
467
+ # We can't use select from DBC.TABLESV.
468
+ sqlbundle = SQLBundle()
469
+ help_db_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_DATABASE)
470
+ pddf = pd.read_sql(help_db_sql.format(_get_quoted_object_name(schema_name=datalake_name,
471
+ object_name=schema_name)),
472
+ tdmlctx.td_connection.connection)
473
+
474
+ if object_name_str:
475
+ object_name_str = _convert_sql_search_string_to_regex(object_name_str)
476
+ if object_name_str:
477
+ name_filter = pddf['Table/View/Macro Name'].str.strip().str.match(object_name_str, na=False,
478
+ flags=re.IGNORECASE)
479
+ pddf = pddf[name_filter]
480
+
481
+ if object_table_kind is not None:
482
+ object_filter = pddf['Kind'].isin(object_table_kind)
483
+ pddf = pddf[object_filter]
484
+
485
+ columns_dict = {'Table/View/Macro Name':
486
+ TeradataTableKindConstants.REGULAR_TABLE_NAME.value}
487
+ pddf.rename(columns=columns_dict, inplace=True)
488
+
489
+ # Return only filtered columns.
490
+ if not pddf.empty:
491
+ return pddf[[TeradataTableKindConstants.REGULAR_TABLE_NAME.value]]
492
+ else:
358
493
  return pd.DataFrame()
359
494
 
360
495
 
@@ -444,6 +579,7 @@ def db_transaction(func):
444
579
  True
445
580
  >>>
446
581
  """
582
+
447
583
  def execute_transaction(*args, **kwargs):
448
584
  auto_commit_off = "{fn teradata_nativesql}{fn teradata_autocommit_off}"
449
585
  auto_commit_on = "{fn teradata_nativesql}{fn teradata_autocommit_on}"
@@ -479,6 +615,7 @@ def db_transaction(func):
479
615
 
480
616
  return execute_transaction
481
617
 
618
+
482
619
  def _execute_stored_procedure(function_call, fetchWarnings=True, expect_none_result=False):
483
620
  """
484
621
  DESCRIPTION:
@@ -576,7 +713,7 @@ def _get_function_call_as_string(sqlcFuncObj):
576
713
  return str(sqlcFuncObj.compile(**kw))
577
714
 
578
715
 
579
- def _get_quoted_object_name(schema_name, object_name):
716
+ def _get_quoted_object_name(schema_name, object_name, datalake=None):
580
717
  """
581
718
  DESCRIPTION:
582
719
  This function quotes and joins schema name to the object name which can either be table or a view.
@@ -585,12 +722,18 @@ def _get_quoted_object_name(schema_name, object_name):
585
722
  schema_name
586
723
  Required Argument.
587
724
  Specifies the schema name.
588
- Types: str
725
+ Type: str
589
726
 
590
727
  object_name
591
728
  Required Argument.
592
729
  Specifies the object name either table or view.
593
- Types: str
730
+ Type: str
731
+
732
+ datalake
733
+ Optional Argument.
734
+ Specifies the datalake name.
735
+ Default value: None
736
+ Type: str
594
737
 
595
738
  RAISES:
596
739
  None
@@ -612,6 +755,8 @@ def _get_quoted_object_name(schema_name, object_name):
612
755
  schema_name = tdp.quote(tdmlctx._get_current_databasename())
613
756
 
614
757
  quoted_object_name = "{0}.{1}".format(schema_name, tdp.quote(object_name))
758
+ if datalake is not None:
759
+ quoted_object_name = "{}.{}".format(tdp.quote(datalake), quoted_object_name)
615
760
  return quoted_object_name
616
761
 
617
762
 
@@ -712,7 +857,6 @@ def view_log(log_type="script", num_lines=1000, query_id=None, log_dir=None):
712
857
  # Validate num_lines is a positive integer.
713
858
  _Validators._validate_positive_int(num_lines, "num_lines")
714
859
 
715
-
716
860
  # Query for viewing last n lines of script log.
717
861
  view_log_query = TableOperatorConstants.SCRIPT_LOG_QUERY.value \
718
862
  .format(num_lines, configure.default_varchar_size)
@@ -733,8 +877,9 @@ def view_log(log_type="script", num_lines=1000, query_id=None, log_dir=None):
733
877
  err_msg = 'Please provide directory path instead of file path.'.format(
734
878
  log_dir)
735
879
  raise TeradataMlException(err_msg, MessageCodes.INPUT_FILE_NOT_FOUND)
736
- from teradataml.scriptmgmt.UserEnv import _get_auth_token, _get_ues_url, \
737
- _process_ues_response
880
+ from teradataml.scriptmgmt.UserEnv import (_get_auth_token,
881
+ _get_ues_url,
882
+ _process_ues_response)
738
883
  ues_url = _get_ues_url(logs=True, query_id=query_id)
739
884
  response = UtilFuncs._http_request(ues_url, headers=_get_auth_token())
740
885
  resp = _process_ues_response(api_name="view_log", response=response)
@@ -798,9 +943,10 @@ def _fetch_url_and_save(url, file_path):
798
943
  def _check_if_python_packages_installed():
799
944
  """
800
945
  DESCRIPTION:
801
- Function to set global variable 'python_packages_installed' to True
802
- or False based on whether the Vantage node has Python and add-on
803
- packages including pip3 installed.
946
+ Function to set the following global variables based on whether the Vantage node
947
+ has Python and add-on packages including pip3 installed.
948
+ - 'python_packages_installed' to True or False
949
+ - 'python_version_vantage' to the version of Python installed on Vantage.
804
950
 
805
951
  PARAMETERS:
806
952
  None.
@@ -814,14 +960,21 @@ def _check_if_python_packages_installed():
814
960
  EXAMPLES:
815
961
  _check_if_python_packages_installed()
816
962
  """
963
+ if tdmlctx.python_packages_installed:
964
+ # Skip check if Python and add-on packages are already installed and checked.
965
+ return
966
+
817
967
  # Check if Python interpreter and add-ons packages are installed or not.
818
968
  try:
819
969
  query = TableOperatorConstants.CHECK_PYTHON_INSTALLED.value.format(configure.indb_install_location)
820
- UtilFuncs._execute_query(query=query)
970
+ opt = UtilFuncs._execute_query(query=query)
971
+
972
+ python_version = opt[0][0].split(" -- ")[1].split(" ")[1].strip()
821
973
 
822
974
  # If query execution is successful, then Python and add-on packages are
823
975
  # present.
824
976
  tdmlctx.python_packages_installed = True
977
+ tdmlctx.python_version_vantage = python_version
825
978
  except Exception as err:
826
979
  # Raise Exception if the error message does not contain
827
980
  # "bash: pip3: command not found".
@@ -932,6 +1085,203 @@ def db_python_package_details(names=None):
932
1085
  return ret_val
933
1086
 
934
1087
 
1088
+ def _db_python_package_version_diff(packages=None, only_diff=True):
1089
+ """
1090
+ DESCRIPTION:
1091
+ Internal function to get the pandas dataframe containing the difference in the Python
1092
+ packages installed on Vantage and the packages mentioned in the argument "packages".
1093
+ Note:
1094
+ * Using this function is valid only when Python interpreter and add-on packages
1095
+ are installed on the Vantage node.
1096
+ * This function also checks for differences in Python packages versions given
1097
+ part of package name as string.
1098
+ * Returns pandas dataframe of only differences when the argument `only_diff` is set to
1099
+ True. Otherwise, returns all the packages.
1100
+
1101
+ PARAMETERS:
1102
+ packages:
1103
+ Required Argument.
1104
+ Specifies the name(s) of the Python package(s) for which the difference
1105
+ in the versions is to be fetched from Vantage.
1106
+ Note:
1107
+ * If this argument is None, all the packages installed on Vantage are considered.
1108
+ * If any package is present in Vantage but not in the current environment, then None
1109
+ is shown as the version of the package in the current environment.
1110
+ Types: str or list of str
1111
+
1112
+ only_diff:
1113
+ Optional Argument.
1114
+ Specifies whether to return only the differences in the versions of the packages
1115
+ installed on Vantage and the packages mentioned in the argument "packages".
1116
+ Default Value: True
1117
+
1118
+ RETURNS:
1119
+ pandas DataFrame
1120
+
1121
+ RAISES:
1122
+ TeradataMlException.
1123
+
1124
+ EXAMPLES:
1125
+ # Note:
1126
+ # These examples will work only when the Python packages are installed on Vantage.
1127
+
1128
+ # Example 1: Get the difference in the versions of Python packages 'dill' and 'matplotlib'
1129
+ # installed on Vantage.
1130
+ >>> _db_python_package_version_diff(["dill", "matplotlib"])
1131
+ package vantage local
1132
+ 0 dill 0.3.6 0.3.7
1133
+
1134
+ # Example 2: Get the difference in the versions of Python packages 'dill' and 'matplotlib'
1135
+ # installed on Vantage and 'only_diff' argument set to False.
1136
+ >>> _db_python_package_version_diff(["dill", "matplotlib"], only_diff=False)
1137
+ package vantage local
1138
+ 0 matplotlib-inline 0.1.6 0.1.6
1139
+ 1 dill 0.3.6 0.3.7
1140
+ 2 matplotlib 3.6.2 3.6.2
1141
+ """
1142
+ # Check if Python interpreter and add-on packages are installed or not.
1143
+ _check_if_python_packages_installed()
1144
+
1145
+ # Raise error if Python and add-on packages are not installed.
1146
+ if not tdmlctx.python_packages_installed:
1147
+ raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_NOT_INSTALLED),
1148
+ MessageCodes.PYTHON_NOT_INSTALLED)
1149
+
1150
+ # Installed packages dictionary.
1151
+ db_pkg_df = db_python_package_details(packages)
1152
+ if db_pkg_df is None:
1153
+ return None
1154
+
1155
+ pkgs_dict = {row.package: row.version for row in db_pkg_df.itertuples()}
1156
+
1157
+ from importlib.metadata import PackageNotFoundError, version
1158
+ diff_list = []
1159
+
1160
+ for pkg in pkgs_dict.keys():
1161
+ vantage_version = pkgs_dict.get(pkg)
1162
+ try:
1163
+ local_version = version(pkg)
1164
+ except PackageNotFoundError:
1165
+ # If package is not found in the current environment, then the local version is set to None.
1166
+ local_version = None
1167
+ except Exception as e:
1168
+ # Any other exception is raised.
1169
+ raise
1170
+
1171
+ if only_diff:
1172
+ if vantage_version != local_version:
1173
+ # Add to list only when the versions are different.
1174
+ diff_list.append([pkg, vantage_version, local_version])
1175
+ else:
1176
+ # Add to list all the packages and versions irrespective of the differences.
1177
+ diff_list.append([pkg, vantage_version, local_version])
1178
+
1179
+ return pd.DataFrame(diff_list, columns=["package", "vantage", "local"])
1180
+
1181
+
1182
+ @collect_queryband(queryband='PythonDiff')
1183
+ def db_python_version_diff():
1184
+ """
1185
+ DESCRIPTION:
1186
+ Function to get the difference of the Python intepreter major version installed on Vantage
1187
+ and the Python version used in the current environment.
1188
+
1189
+ Note:
1190
+ * Using this function is valid only when Python interpreter and add-on packages
1191
+ are installed on the Vantage node.
1192
+
1193
+ RETURNS:
1194
+ Empty dictionary when Python major version is same on Vantage and the current environment.
1195
+ Otherwise, returns a dictionary with the following keys:
1196
+ - 'vantage_version': Python major version installed on Vantage.
1197
+ - 'local_version': Python major version used in the current environment.
1198
+
1199
+ RAISES:
1200
+ TeradataMlException.
1201
+
1202
+ EXAMPLES:
1203
+ # Note:
1204
+ # These examples will work only when the Python packages are installed on Vantage.
1205
+
1206
+ # Example 1: Get the difference in the Python version installed on Vantage and the current environment.
1207
+ >>> db_python_version_diff()
1208
+ {"vantage_version": "3.7", "local_version": "3.8"}
1209
+ """
1210
+ # Check if Python interpretor and add-on packages are installed or not.
1211
+ _check_if_python_packages_installed()
1212
+
1213
+ # Raise error if Python and add-on packages are not installed.
1214
+ if not tdmlctx.python_packages_installed:
1215
+ raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_NOT_INSTALLED),
1216
+ MessageCodes.PYTHON_NOT_INSTALLED)
1217
+
1218
+ # Get major version of python installed on Vantage and the current environment.
1219
+ python_local = tdmlctx.python_version_local.rsplit(".", 1)[0]
1220
+ python_vantage = tdmlctx.python_version_vantage.rsplit(".", 1)[0]
1221
+
1222
+ if python_local != python_vantage:
1223
+ return {"vantage_version": python_vantage, "local_version": python_local}
1224
+
1225
+ return {}
1226
+
1227
+
1228
+ @collect_queryband(queryband='PkgDiff')
1229
+ def db_python_package_version_diff(packages=None):
1230
+ """
1231
+ DESCRIPTION:
1232
+ Function to get the difference of the Python packages installed on Vantage and
1233
+ in the current environment mentioned in the argument "packages".
1234
+
1235
+ Notes:
1236
+ * Using this function is valid only when Python interpreter and add-on packages
1237
+ are installed on the Vantage node.
1238
+ * This function also checks for differences in Python packages versions given
1239
+ part of package name as string.
1240
+
1241
+ PARAMETERS:
1242
+ packages:
1243
+ Optional Argument.
1244
+ Specifies the name(s) of the Python package(s) for which the difference
1245
+ in the versions is to be fetched from Vantage.
1246
+ Notes:
1247
+ * If this argument is None, all the packages installed on Vantage are considered.
1248
+ * If any package is present in Vantage but not in the current environment, then None
1249
+ is shown as the version of the package in the current environment.
1250
+ Types: str or list of str
1251
+
1252
+ RETURNS:
1253
+ pandas DataFrame
1254
+
1255
+ RAISES:
1256
+ TeradataMlException.
1257
+
1258
+ EXAMPLES:
1259
+ # Note:
1260
+ # These examples will work only when the Python packages are installed on Vantage.
1261
+
1262
+ # Example 1: Get the difference in the versions of Python package 'dill' installed on Vantage.
1263
+ >>> db_python_package_version_diff("dill")
1264
+ package vantage local
1265
+ 0 dill 0.10.0 0.11.2
1266
+
1267
+ # Example 2: Get the difference in the versions of all Python packages installed on Vantage.
1268
+ >>> db_python_package_version_diff()
1269
+ package vantage local
1270
+ 0 scikit-learn 1.3.3 0.24.2
1271
+ 1 dill 0.10.0 0.11.2
1272
+ ...
1273
+ 532 attrs 18.2.0 17.0.0
1274
+
1275
+ """
1276
+ # Validate arguments.
1277
+ __arg_info_matrix = []
1278
+ __arg_info_matrix.append(["packages", packages, True, (str, list), True])
1279
+
1280
+ _Validators._validate_function_arguments(arg_list=__arg_info_matrix)
1281
+
1282
+ return _db_python_package_version_diff(packages=packages)
1283
+
1284
+
935
1285
  def _create_table(table_name,
936
1286
  columns,
937
1287
  primary_index=None,
@@ -1060,7 +1410,7 @@ def _create_table(table_name,
1060
1410
  else:
1061
1411
  pti = pti.no_primary_index()
1062
1412
 
1063
- con_form=[]
1413
+ con_form = []
1064
1414
  foreign_constraints = []
1065
1415
  for c_name, parameters in kwargs.items():
1066
1416
  _Validators._validate_function_arguments([["constraint_type", c_name, True, str,
@@ -1097,7 +1447,7 @@ def _create_table(table_name,
1097
1447
  " columns.items()),{} teradatasql_post_create=pti,prefixes=prefix," \
1098
1448
  "schema=schema_name)".format("" if con_form is None else ",".join(con_form))
1099
1449
 
1100
- table=eval(table_str)
1450
+ table = eval(table_str)
1101
1451
  for foreign_constraint in foreign_constraints:
1102
1452
  table.append_constraint(foreign_constraint)
1103
1453
  table.create(bind=tdmlctx.get_context())
@@ -1107,7 +1457,8 @@ def _create_table(table_name,
1107
1457
  raise TeradataMlException(Messages.get_message(msg_code, "create table", str(err)), msg_code)
1108
1458
 
1109
1459
 
1110
- def _create_database(schema_name, size='10e6', spool_size=None):
1460
+ def _create_database(schema_name, size='10e6', spool_size=None,
1461
+ datalake=None, **kwargs):
1111
1462
  """
1112
1463
  DESCRIPTION:
1113
1464
  Internal function to create a database with the specified name and size.
@@ -1133,6 +1484,16 @@ def _create_database(schema_name, size='10e6', spool_size=None):
1133
1484
  Exponential notation can also be used.
1134
1485
  Types: str or int
1135
1486
 
1487
+ datalake:
1488
+ Optional Argument.
1489
+ Specifies the name of datalake to create database in.
1490
+ Types: str
1491
+
1492
+ kwargs:
1493
+ Optional Argument.
1494
+ Specifies keyword arguments which are used in DBPROPERTIES
1495
+ clause as key-value pair while creating datalake database.
1496
+
1136
1497
  RETURNS:
1137
1498
  bool
1138
1499
 
@@ -1140,11 +1501,29 @@ def _create_database(schema_name, size='10e6', spool_size=None):
1140
1501
  TeradataMlException.
1141
1502
 
1142
1503
  EXAMPLES:
1143
- >>> from teradataml.dbutils.dbutils import _create_database
1144
- >>> _create_database("db_name1", "10e5")
1504
+ >>> from teradataml.dbutils.dbutils import _create_database
1505
+ # Example 1: Create database.
1506
+ >>> _create_database("db_name1", "10e5")
1507
+
1508
+ # Example 2: Create database in datalake.
1509
+ >>> _create_database("otf_db_1", datalake="datalake_iceberg_glue")
1510
+
1511
+ # Example 3: Create database in datalake having DBPROPERTIES.
1512
+ >>> _create_database("otf_db", datalake="datalake_iceberg_glue",
1513
+ ... owner='tdml_user', other_property='some_value',
1514
+ ... other_property2=20, comment='Created by tdml_user')
1145
1515
  """
1146
- sql = "CREATE DATABASE {} FROM {} AS PERM = {}".format(
1147
- schema_name, tdmlctx._get_database_username(), size)
1516
+ if datalake:
1517
+ db_properties = []
1518
+ for key, val in kwargs.items():
1519
+ db_properties.append("'{}'='{}'".format(key, val))
1520
+
1521
+ sql = "CREATE DATABASE {}.{}{};".format(datalake, schema_name,
1522
+ ' DBPROPERTIES({})'.format(','.join(db_properties))
1523
+ if db_properties else '')
1524
+
1525
+ else:
1526
+ sql = "CREATE DATABASE {} FROM {} AS PERM = {}".format(schema_name, tdmlctx._get_database_username(), size)
1148
1527
 
1149
1528
  # If user pass spool size, create it with specified space.
1150
1529
  if spool_size:
@@ -1203,7 +1582,7 @@ def _update_data(update_columns_values, table_name, schema_name, datalake_name=N
1203
1582
 
1204
1583
  # If key_columns_values is passed, then prepare the SQL with where clause.
1205
1584
  # Else, simply update every thing.
1206
- schema_name = "{}.{}".format(datalake_name, schema_name) if datalake_name else schema_name
1585
+ qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
1207
1586
 
1208
1587
  get_str_ = lambda val: "'{}'".format(val) if isinstance(val, str) else val
1209
1588
  if update_conditions:
@@ -1220,14 +1599,14 @@ def _update_data(update_columns_values, table_name, schema_name, datalake_name=N
1220
1599
 
1221
1600
  where_clause = " AND ".join(where_)
1222
1601
 
1223
- sql = f"""UPDATE {schema_name}.{table_name} SET {update_clause}
1602
+ sql = f"""UPDATE {qualified_table_name} SET {update_clause}
1224
1603
  WHERE {where_clause}
1225
1604
  """
1226
1605
 
1227
- execute_sql(sql, (*update_values, ))
1606
+ execute_sql(sql, (*update_values,))
1228
1607
 
1229
1608
  else:
1230
- sql = f"""UPDATE {schema_name}.{table_name} SET {update_clause}"""
1609
+ sql = f"""UPDATE {qualified_table_name} SET {update_clause}"""
1231
1610
 
1232
1611
  execute_sql(sql, update_values)
1233
1612
  return True
@@ -1276,10 +1655,7 @@ def _insert_data(table_name, values, columns=None, schema_name=None, datalake_na
1276
1655
  >>> _insert_data("tbl", (1, 2, 3))
1277
1656
  """
1278
1657
  # Prepare the update clause.
1279
- if schema_name:
1280
- table_name = '"{}"."{}"'.format(schema_name, table_name)
1281
- if datalake_name:
1282
- table_name = '"{}"."{}"'.format(datalake_name, table_name)
1658
+ qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
1283
1659
 
1284
1660
  values = UtilFuncs._as_list(values)
1285
1661
 
@@ -1292,7 +1668,7 @@ def _insert_data(table_name, values, columns=None, schema_name=None, datalake_na
1292
1668
  columns = ""
1293
1669
  _q_marks = ["?"] * (len(values[0]))
1294
1670
 
1295
- sql = "insert into {} {} values ({});".format(table_name, columns, ", ".join(_q_marks))
1671
+ sql = "insert into {} {} values ({});".format(qualified_table_name, columns, ", ".join(_q_marks))
1296
1672
  execute_sql(sql, values)
1297
1673
 
1298
1674
  return True
@@ -1339,6 +1715,8 @@ def _upsert_data(update_columns_values,
1339
1715
  datalake_name:
1340
1716
  Optional Argument.
1341
1717
  Specifies the name of the datalake to look for "schema_name".
1718
+ Note:
1719
+ "schema_name" must be provided while using this argument.
1342
1720
  Types: str
1343
1721
 
1344
1722
  RETURNS:
@@ -1357,8 +1735,7 @@ def _upsert_data(update_columns_values,
1357
1735
  )
1358
1736
  """
1359
1737
  # If user passes datalake name, then append the same to schema name.
1360
- if datalake_name:
1361
- schema_name = "{}.{}".format(datalake_name, schema_name)
1738
+ qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
1362
1739
 
1363
1740
  # Prepare the update clause.
1364
1741
  update_clause = ", ".join(("{} = ?".format(col) for col in update_columns_values))
@@ -1373,12 +1750,13 @@ def _upsert_data(update_columns_values,
1373
1750
  insert_clause = "({}) values ({})".format(", ".join(insert_columns_values), insert_values_clause)
1374
1751
  insert_values = tuple((_value for _value in insert_columns_values.values()))
1375
1752
 
1376
- sql = f"""UPDATE {schema_name}.{table_name} SET {update_clause}
1753
+ sql = f"""UPDATE {qualified_table_name} SET {update_clause}
1377
1754
  WHERE {where_clause}
1378
- ELSE INSERT {schema_name}.{table_name} {insert_clause}
1755
+ ELSE INSERT {qualified_table_name} {insert_clause}
1379
1756
  """
1380
1757
  execute_sql(sql, (*update_values, *where_values, *insert_values))
1381
1758
 
1759
+
1382
1760
  def _delete_data(table_name, schema_name=None, datalake_name=None, delete_conditions=None):
1383
1761
  """
1384
1762
  DESCRIPTION:
@@ -1403,8 +1781,9 @@ def _delete_data(table_name, schema_name=None, datalake_name=None, delete_condit
1403
1781
 
1404
1782
  delete_conditions:
1405
1783
  Optional Argument.
1406
- Specifies the ColumnExpression to use for removing the data.
1407
- Types: ColumnExpression
1784
+ Specifies the ColumnExpression or dictionary containing key values
1785
+ pairs to use for removing the data.
1786
+ Types: ColumnExpression, dict
1408
1787
 
1409
1788
  RETURNS:
1410
1789
  int, specifies the number of records those are deleted.
@@ -1416,24 +1795,34 @@ def _delete_data(table_name, schema_name=None, datalake_name=None, delete_condit
1416
1795
  >>> from teradataml.dbutils.dbutils import _delete_data
1417
1796
  >>> _delete_data("tbl", "db_name1", delete_conditions={"column1": "value1"})
1418
1797
  """
1419
- if schema_name:
1420
- table_name = '"{}"."{}"'.format(schema_name, table_name)
1421
-
1422
- if datalake_name:
1423
- table_name = "{}.{}".format(datalake_name, table_name)
1424
-
1798
+ qualified_table_name = _get_quoted_object_name(schema_name, table_name, datalake_name)
1425
1799
  sqlbundle = SQLBundle()
1426
1800
 
1427
- sql = sqlbundle._get_sql_query(SQLConstants.SQL_DELETE_ALL_ROWS).format(table_name)
1801
+ sql = sqlbundle._get_sql_query(SQLConstants.SQL_DELETE_ALL_ROWS).format(qualified_table_name)
1428
1802
 
1429
1803
  # If condition exist, the prepare where clause.
1430
1804
  if delete_conditions:
1431
- where_clause = delete_conditions.compile()
1432
- sql = sqlbundle._get_sql_query(SQLConstants.SQL_DELETE_SPECIFIC_ROW).format(table_name, where_clause)
1805
+ from teradataml.dataframe.sql import _SQLColumnExpression
1806
+ if isinstance(delete_conditions, _SQLColumnExpression):
1807
+ where_clause = delete_conditions.compile()
1808
+ elif isinstance(delete_conditions, dict):
1809
+ get_str_ = lambda val: "'{}'".format(val) if isinstance(val, str) else val
1810
+ where_ = []
1811
+ for column, col_value in delete_conditions.items():
1812
+ if isinstance(col_value, list):
1813
+ col_value = ", ".join(get_str_(val) for val in col_value)
1814
+ col_value = "({})".format(col_value)
1815
+ where_.append("{} IN {}".format(column, col_value))
1816
+ else:
1817
+ where_.append("{} = {}".format(column, col_value))
1818
+ where_clause = " AND ".join(where_)
1819
+
1820
+ sql = sqlbundle._get_sql_query(SQLConstants.SQL_DELETE_SPECIFIC_ROW).format(qualified_table_name, where_clause)
1433
1821
 
1434
1822
  res = execute_sql(sql)
1435
1823
  return res.rowcount
1436
1824
 
1825
+
1437
1826
  @collect_queryband(queryband='LstKwrds')
1438
1827
  def list_td_reserved_keywords(key=None, raise_error=False):
1439
1828
  """
@@ -1498,6 +1887,7 @@ def list_td_reserved_keywords(key=None, raise_error=False):
1498
1887
  """
1499
1888
 
1500
1889
  from teradataml.dataframe.dataframe import DataFrame, in_schema
1890
+
1501
1891
  # Get the reserved keywords from the table
1502
1892
  reserved_keys = DataFrame(in_schema("SYSLIB", "SQLRestrictedWords"))
1503
1893
 
@@ -1515,10 +1905,10 @@ def list_td_reserved_keywords(key=None, raise_error=False):
1515
1905
  # Check if key contains Teradata reserved keyword or not.
1516
1906
  res_key = (k.upper() for k in key if k.upper() in reservered_words)
1517
1907
  res_key = list(res_key)
1518
- if len(res_key)>0:
1908
+ if len(res_key) > 0:
1519
1909
  if raise_error:
1520
1910
  raise TeradataMlException(Messages.get_message(MessageCodes.RESERVED_KEYWORD, res_key),
1521
- MessageCodes.RESERVED_KEYWORD)
1911
+ MessageCodes.RESERVED_KEYWORD)
1522
1912
  return True
1523
1913
  return False
1524
1914
 
@@ -1608,6 +1998,7 @@ def _execute_query_and_generate_pandas_df(query, index=None, **kwargs):
1608
1998
 
1609
1999
  return pandas_df
1610
2000
 
2001
+
1611
2002
  class _TDSessionParams:
1612
2003
  """
1613
2004
  A successfull connection through teradataml establishes a session with Vantage.
@@ -1615,6 +2006,7 @@ class _TDSessionParams:
1615
2006
  for parameter 'Session Time Zone'.
1616
2007
  This is an internal utility to store all session related parameters.
1617
2008
  """
2009
+
1618
2010
  def __init__(self, data):
1619
2011
  """
1620
2012
  Constructor to store columns and rows of session params.
@@ -1641,6 +2033,7 @@ class _TDSessionParams:
1641
2033
  return self.__session_params[parameter]
1642
2034
  raise AttributeError("'TDSessionParams' object has no attribute '{}'".format(parameter))
1643
2035
 
2036
+
1644
2037
  def set_session_param(name, value):
1645
2038
  """
1646
2039
  DESCRIPTION:
@@ -1816,15 +2209,16 @@ def set_session_param(name, value):
1816
2209
  [param[0] for param in result.description],
1817
2210
  [value for value in next(result)]
1818
2211
  ))
1819
- _InternalBuffer.add(session_params = _TDSessionParams(data))
2212
+ _InternalBuffer.add(session_params=_TDSessionParams(data))
1820
2213
  # Store function name of 'DEBUG_FUNCTION' used.
1821
- _InternalBuffer.add(function_name = value[0] if name.upper() == 'DEBUG_FUNCTION' else '')
2214
+ _InternalBuffer.add(function_name=value[0] if name.upper() == 'DEBUG_FUNCTION' else '')
1822
2215
 
1823
2216
  # Set the session parameter.
1824
2217
  execute_sql(getattr(SessionParamsSQL, name.upper()).format(*value))
1825
2218
 
1826
2219
  return True
1827
2220
 
2221
+
1828
2222
  def unset_session_param(name):
1829
2223
  """
1830
2224
  DESCRIPTION:
@@ -1868,7 +2262,7 @@ def unset_session_param(name):
1868
2262
  # unset_values stores params which are not available in _InternalBuffer, to unset create a dictionary
1869
2263
  # with param as key and unset param as value
1870
2264
  unset_values = {"CHARACTER_SET_UNICODE": "OFF", "DEBUG_FUNCTION": [_InternalBuffer.get('function_name'), "OFF"],
1871
- "ISOLATED_LOADING":"NO", "FUNCTION_TRACE":"SET SESSION FUNCTION TRACE OFF",
2265
+ "ISOLATED_LOADING": "NO", "FUNCTION_TRACE": "SET SESSION FUNCTION TRACE OFF",
1872
2266
  "JSON_IGNORE_ERRORS": "OFF", "QUERY_BAND": ["", "SESSION"]}
1873
2267
 
1874
2268
  # If 'name' in unset_values unset the params
@@ -1882,14 +2276,16 @@ def unset_session_param(name):
1882
2276
  return True
1883
2277
 
1884
2278
  previous_value = "{}".format(session_params[getattr(SessionParamsPythonNames, name.upper())]) \
1885
- if name.upper() != 'TIMEZONE' else "'{}'".format(session_params[getattr(SessionParamsPythonNames, name.upper())])
1886
-
2279
+ if name.upper() != 'TIMEZONE' else "'{}'".format(
2280
+ session_params[getattr(SessionParamsPythonNames, name.upper())])
2281
+
1887
2282
  if name.upper() == "ACCOUNT":
1888
2283
  previous_value = [previous_value, 'SESSION']
1889
2284
  set_session_param(name, previous_value)
1890
2285
 
1891
2286
  return True
1892
2287
 
2288
+
1893
2289
  class _Authorize:
1894
2290
  """ Parent class to either provide or revoke access on table(s). """
1895
2291
  _property = None