teradataml 20.0.0.5__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (119) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +306 -0
  3. teradataml/__init__.py +1 -1
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +162 -76
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/__init__.py +2 -0
  8. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  9. teradataml/analytics/json_parser/metadata.py +22 -4
  10. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  11. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  12. teradataml/analytics/sqle/__init__.py +3 -0
  13. teradataml/analytics/utils.py +59 -11
  14. teradataml/automl/__init__.py +2369 -464
  15. teradataml/automl/autodataprep/__init__.py +15 -0
  16. teradataml/automl/custom_json_utils.py +184 -112
  17. teradataml/automl/data_preparation.py +113 -58
  18. teradataml/automl/data_transformation.py +154 -53
  19. teradataml/automl/feature_engineering.py +113 -53
  20. teradataml/automl/feature_exploration.py +548 -25
  21. teradataml/automl/model_evaluation.py +260 -32
  22. teradataml/automl/model_training.py +399 -206
  23. teradataml/clients/auth_client.py +10 -6
  24. teradataml/clients/keycloak_client.py +165 -0
  25. teradataml/common/aed_utils.py +11 -2
  26. teradataml/common/bulk_exposed_utils.py +4 -2
  27. teradataml/common/constants.py +72 -2
  28. teradataml/common/exceptions.py +32 -0
  29. teradataml/common/garbagecollector.py +50 -21
  30. teradataml/common/messagecodes.py +73 -1
  31. teradataml/common/messages.py +27 -1
  32. teradataml/common/sqlbundle.py +25 -7
  33. teradataml/common/utils.py +210 -22
  34. teradataml/context/aed_context.py +16 -10
  35. teradataml/context/context.py +37 -9
  36. teradataml/data/Employee.csv +5 -0
  37. teradataml/data/Employee_Address.csv +4 -0
  38. teradataml/data/Employee_roles.csv +5 -0
  39. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  40. teradataml/data/byom_example.json +5 -0
  41. teradataml/data/creditcard_data.csv +284618 -0
  42. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  43. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  44. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  45. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  46. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  47. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  48. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  55. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  56. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  57. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  58. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  59. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  60. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  61. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  62. teradataml/data/load_example_data.py +29 -11
  63. teradataml/data/pattern_matching_data.csv +11 -0
  64. teradataml/data/payment_fraud_dataset.csv +10001 -0
  65. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  66. teradataml/data/teradataml_example.json +75 -1
  67. teradataml/data/url_data.csv +10 -9
  68. teradataml/dataframe/copy_to.py +715 -55
  69. teradataml/dataframe/dataframe.py +2115 -97
  70. teradataml/dataframe/dataframe_utils.py +66 -28
  71. teradataml/dataframe/functions.py +1130 -2
  72. teradataml/dataframe/setop.py +4 -1
  73. teradataml/dataframe/sql.py +710 -1039
  74. teradataml/dbutils/dbutils.py +470 -35
  75. teradataml/dbutils/filemgr.py +1 -1
  76. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  77. teradataml/hyperparameter_tuner/utils.py +4 -2
  78. teradataml/lib/aed_0_1.dll +0 -0
  79. teradataml/lib/libaed_0_1.dylib +0 -0
  80. teradataml/lib/libaed_0_1.so +0 -0
  81. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  82. teradataml/opensource/_base.py +7 -1
  83. teradataml/options/configure.py +20 -4
  84. teradataml/scriptmgmt/UserEnv.py +247 -36
  85. teradataml/scriptmgmt/lls_utils.py +140 -39
  86. teradataml/sdk/README.md +79 -0
  87. teradataml/sdk/__init__.py +4 -0
  88. teradataml/sdk/_auth_modes.py +422 -0
  89. teradataml/sdk/_func_params.py +487 -0
  90. teradataml/sdk/_json_parser.py +453 -0
  91. teradataml/sdk/_openapi_spec_constants.py +249 -0
  92. teradataml/sdk/_utils.py +236 -0
  93. teradataml/sdk/api_client.py +900 -0
  94. teradataml/sdk/constants.py +62 -0
  95. teradataml/sdk/modelops/__init__.py +98 -0
  96. teradataml/sdk/modelops/_client.py +409 -0
  97. teradataml/sdk/modelops/_constants.py +304 -0
  98. teradataml/sdk/modelops/models.py +2308 -0
  99. teradataml/sdk/spinner.py +107 -0
  100. teradataml/series/series.py +12 -7
  101. teradataml/store/feature_store/constants.py +601 -234
  102. teradataml/store/feature_store/feature_store.py +2886 -616
  103. teradataml/store/feature_store/mind_map.py +639 -0
  104. teradataml/store/feature_store/models.py +5831 -214
  105. teradataml/store/feature_store/utils.py +390 -0
  106. teradataml/table_operators/query_generator.py +4 -21
  107. teradataml/table_operators/table_operator_util.py +1 -1
  108. teradataml/table_operators/templates/dataframe_register.template +6 -2
  109. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  110. teradataml/utils/docstring.py +527 -0
  111. teradataml/utils/dtypes.py +95 -1
  112. teradataml/utils/internal_buffer.py +2 -2
  113. teradataml/utils/utils.py +41 -3
  114. teradataml/utils/validators.py +699 -18
  115. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +312 -2
  116. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +119 -87
  117. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  118. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  119. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -126,6 +126,8 @@ class ErrorInfoCodes(Enum):
126
126
  CANNOT_USE_TOGETHER_WITH = 'TDML_2042'
127
127
  TABLE_DOES_NOT_EXIST = 'TDML_2046'
128
128
  DEPENDENT_METHOD = 'TDML_2113'
129
+ TDMLDF_COLUMN_IN_ARG_FOUND = 'TDML_2114'
130
+ EITHER_ANY_ARGUMENT = 'TDML_2115'
129
131
 
130
132
  # Reserved for Generic Error Messages: 2121 - 2199
131
133
  RESERVED_KEYWORD = 'TDML_2121'
@@ -227,6 +229,38 @@ class ErrorInfoCodes(Enum):
227
229
 
228
230
  # OpenAF Error codes starting from 2551 - Reserved till 2560.
229
231
  SET_REQUIRED_PARAMS = 'TDML_2551'
232
+ INVALID_USAGE = 'TDML_2552'
233
+
234
+ # Error codes for OTF. Reserved till 2570.
235
+ OTF_TABLE_REQUIRED = 'TDML_2561'
236
+
237
+ # Rest Excceptions. Reserved for 2570 - 2580
238
+ REST_HTTP_ERROR = 'TDML_2570'
239
+ REST_AUTH_MISSING_ARG = 'TDML_2571'
240
+ REST_NOT_CONFIGURED = 'TDML_2572'
241
+ REST_DEVICE_CODE_NO_BOTH = 'TDML_2573'
242
+ REST_DEVICE_CODE_GEN_FAILED = 'TDML_2574'
243
+ REST_DEVICE_CODE_AUTH_FAILED = 'TDML_2575'
244
+
245
+ # Python SDK Error codes starting from 2580 - Reserved till 2590.
246
+ INFO_NOT_PROVIDED_USE_DEFAULT = 'TDML_W_2580' # Logger warning.
247
+
248
+ # EFS Error codes starting from 2600 - Reserved till 2650.
249
+ EFS_COMPONENT_NOT_EXIST = 'TDML_2600'
250
+ EFS_INVALID_PROCESS_TYPE = 'TDML_2601'
251
+ EFS_INVALID_FEATURE_TYPE = 'TDML_2602'
252
+ EFS_FEATURE_IN_DATASET = 'TDML_2603'
253
+ EFS_FEATURE_IN_CATALOG = 'TDML_2604'
254
+ EFS_ENTITY_IN_CATALOG = 'TDML_2605'
255
+ DF_DUPLICATE_VALUES = 'TDML_2606'
256
+ DF_NULL_VALUES = 'TDML_2607'
257
+ EFS_FEATURE_ENTITY_MISMATCH = 'TDML_2608'
258
+ FEATURES_ARCHIVED = 'TDML_2609'
259
+ EFS_DELETE_BEFORE_ARCHIVE = 'TDML_2610'
260
+ EFS_OBJ_IN_FEATURE_PROCESS = 'TDML_2611'
261
+ EFS_OBJECT_NOT_EXIST = 'TDML_2612'
262
+ EFS_OBJECT_IN_OTHER_DOMAIN = 'TDML_2613'
263
+
230
264
 
231
265
  class MessageCodes(Enum):
232
266
  """
@@ -373,7 +407,7 @@ class MessageCodes(Enum):
373
407
  VANTAGE_WARNING = "Following warning raised from Vantage with warning code: {}\n{}"
374
408
  FASTLOAD_FAILS = "fastload() failed to load pandas dataframe to Teradata Vantage."
375
409
  REMOVE_FILE_FAILED = "Failed to remove {} from Teradata Vantage"
376
- INPUT_FILE_NOT_FOUND = "Input file '{}' not found. Please check the file path."
410
+ INPUT_FILE_NOT_FOUND = "Input file(s) '{}' not found. Please check the file path(s)."
377
411
  INSTALL_FILE_FAILED = "File '{}' cannot be installed."
378
412
  REPLACE_FILE_FAILED = "Unable to replace '{}'"
379
413
  URL_UNREACHABLE = "URL '{}' is unreachable."
@@ -396,6 +430,7 @@ class MessageCodes(Enum):
396
430
  NO_ENVIRONMENT_FOUND = "No {} environment(s) found."
397
431
  UNSUPPORTED_FILE_EXTENSION = "Unsupported file extension specified. Supported file extensions is/are {}."
398
432
  FILE_EMPTY = "Input file {} is empty."
433
+ EITHER_ANY_ARGUMENT = "Provide either {} argument(s)."
399
434
 
400
435
  PYTHON_NOT_INSTALLED = "Python is not installed on Vantage. " \
401
436
  "Please install Python interpreter and add-on packages on Vantage."
@@ -442,3 +477,40 @@ class MessageCodes(Enum):
442
477
  "explicitly passed to function or specified using a configuration file, or setting up " \
443
478
  "the environment variables."
444
479
  DEPENDENT_METHOD = "Method(s) {} must be called before calling '{}'."
480
+ TDMLDF_COLUMN_IN_ARG_FOUND = "Column '{}' provided in '{}' argument, exist in {} {}."
481
+ INVALID_USAGE = "Invalid usage of {0} {1}. Use {0} {1} only {2}."
482
+ REST_HTTP_ERROR = "Failed to run rest API:\n{}"
483
+ REST_AUTH_MISSING_ARG = "For '{}' authentication, '{}' is/are not provided in config file or "\
484
+ "environment variable or through constructor argument 'auth'."
485
+ REST_NOT_CONFIGURED = "The argument '{}' is not set. {} endpoint not configured.\n" \
486
+ "Try (re)copy the CLI configuration from {} UI -> Session Details -> CLI Config."
487
+ REST_DEVICE_CODE_NO_BOTH = "Token does not contain access_token or refresh_token. Received token: {}"
488
+ REST_DEVICE_CODE_GEN_FAILED = "Error generating the device code. Received code: {}."
489
+ REST_DEVICE_CODE_AUTH_FAILED = "Error authenticating the device code.\n{}."
490
+
491
+ INFO_NOT_PROVIDED_USE_DEFAULT = "{} is not provided in path '{}' method '{}' for operationID '{}' using default {}."
492
+ OTF_TABLE_REQUIRED = "{} is supported only with OTF table."
493
+ EFS_COMPONENT_NOT_EXIST = "{} '{}' does not exist. Use {} to list valid {}."
494
+ EFS_INVALID_PROCESS_TYPE = "Invalid process type '{}' detected. Valid types are: {}."
495
+ EFS_INVALID_FEATURE_TYPE = ("Invalid feature type '{}' detected for feature ''. "
496
+ "Features cannot ignest for types: {}.")
497
+ EFS_FEATURE_IN_DATASET = ("Feature(s) {} is/are associated with an existing dataset(s) {}. "
498
+ "Feature(s) can be {} only when they are not associated with any dataset. "
499
+ "Use 'DatasetCatalog.list_datasets()' to see the list of features associated with datasets.")
500
+ EFS_FEATURE_IN_CATALOG = ("Feature '{}' exists in feature catalog. "
501
+ "Delete the feature first using FeatureCatalog.delete_features().")
502
+ EFS_ENTITY_IN_CATALOG = ("Entity '{}' is associated with feature(s) {} in Feature catalog. "
503
+ "Delete these features using FeatureCatalog.delete_features().")
504
+ DF_DUPLICATE_VALUES = "Duplicate {} are not allowed. Found the duplicate value(s) {}."
505
+ DF_NULL_VALUES = ("Null value(s) are not allowed in {} while {}. "
506
+ "Found the null value(s) {}.")
507
+ EFS_FEATURE_ENTITY_MISMATCH = ("Feature(s) {} is/are associated with entities {}. One cannot "
508
+ "ingest same feature for another entity in the same data domain. "
509
+ "Either choose a different feature name or choose a different data domain.")
510
+ FEATURES_ARCHIVED = "Feature(s) {} is/are archived. {}"
511
+ EFS_DELETE_BEFORE_ARCHIVE = ("{0} '{1}' is not archived. Archive the {0} before deleting it."
512
+ "Use 'FeatureStore.archive_{2}()' to archive the {0}.")
513
+ EFS_OBJ_IN_FEATURE_PROCESS = ("{0} '{1}' is associated with {2}. {0} can be modified only when it is "
514
+ "not associated with {2}. Archive the {3} using {4} and try again.")
515
+ EFS_OBJECT_NOT_EXIST = "{} with {} does not exist in data domain '{}'."
516
+ EFS_OBJECT_IN_OTHER_DOMAIN = "{} with {} does not exist in data domain '{}'. It exists in other data domain(s): {}."
@@ -196,7 +196,33 @@ class Messages():
196
196
  [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE],
197
197
  [ErrorInfoCodes.SET_REQUIRED_PARAMS, MessageCodes.SET_REQUIRED_PARAMS],
198
198
  [ErrorInfoCodes.MISSING_ARGS, MessageCodes.CONNECTION_PARAMS],
199
- [ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD]
199
+ [ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD],
200
+ [ErrorInfoCodes.TDMLDF_COLUMN_IN_ARG_FOUND, MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND],
201
+ [ErrorInfoCodes.INVALID_USAGE, MessageCodes.INVALID_USAGE],
202
+ [ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD],
203
+ [ErrorInfoCodes.REST_HTTP_ERROR, MessageCodes.REST_HTTP_ERROR],
204
+ [ErrorInfoCodes.REST_AUTH_MISSING_ARG, MessageCodes.REST_AUTH_MISSING_ARG],
205
+ [ErrorInfoCodes.REST_NOT_CONFIGURED, MessageCodes.REST_NOT_CONFIGURED],
206
+ [ErrorInfoCodes.REST_DEVICE_CODE_NO_BOTH, MessageCodes.REST_DEVICE_CODE_NO_BOTH],
207
+ [ErrorInfoCodes.REST_DEVICE_CODE_GEN_FAILED, MessageCodes.REST_DEVICE_CODE_GEN_FAILED],
208
+ [ErrorInfoCodes.REST_DEVICE_CODE_AUTH_FAILED, MessageCodes.REST_DEVICE_CODE_AUTH_FAILED],
209
+ [ErrorInfoCodes.INFO_NOT_PROVIDED_USE_DEFAULT, MessageCodes.INFO_NOT_PROVIDED_USE_DEFAULT],
210
+ [ErrorInfoCodes.OTF_TABLE_REQUIRED, MessageCodes.OTF_TABLE_REQUIRED],
211
+ [ErrorInfoCodes.EFS_COMPONENT_NOT_EXIST, MessageCodes.EFS_COMPONENT_NOT_EXIST],
212
+ [ErrorInfoCodes.EFS_INVALID_PROCESS_TYPE, MessageCodes.EFS_INVALID_PROCESS_TYPE],
213
+ [ErrorInfoCodes.EFS_FEATURE_IN_DATASET, MessageCodes.EFS_FEATURE_IN_DATASET],
214
+ [ErrorInfoCodes.EFS_FEATURE_IN_CATALOG, MessageCodes.EFS_FEATURE_IN_CATALOG],
215
+ [ErrorInfoCodes.EFS_ENTITY_IN_CATALOG, MessageCodes.EFS_ENTITY_IN_CATALOG],
216
+ [ErrorInfoCodes.DF_DUPLICATE_VALUES, MessageCodes.DF_DUPLICATE_VALUES],
217
+ [ErrorInfoCodes.DF_NULL_VALUES, MessageCodes.DF_NULL_VALUES],
218
+ [ErrorInfoCodes.EFS_FEATURE_ENTITY_MISMATCH, MessageCodes.EFS_FEATURE_ENTITY_MISMATCH],
219
+ [ErrorInfoCodes.FEATURES_ARCHIVED, MessageCodes.FEATURES_ARCHIVED],
220
+ [ErrorInfoCodes.EFS_DELETE_BEFORE_ARCHIVE, MessageCodes.EFS_DELETE_BEFORE_ARCHIVE],
221
+ [ErrorInfoCodes.EFS_OBJ_IN_FEATURE_PROCESS, MessageCodes.EFS_OBJ_IN_FEATURE_PROCESS],
222
+ [ErrorInfoCodes.EFS_OBJECT_NOT_EXIST, MessageCodes.EFS_OBJECT_NOT_EXIST],
223
+ [ErrorInfoCodes.EFS_OBJECT_IN_OTHER_DOMAIN, MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN],
224
+ [ErrorInfoCodes.EITHER_ANY_ARGUMENT, MessageCodes.EITHER_ANY_ARGUMENT],
225
+
200
226
  ]
201
227
 
202
228
  @staticmethod
@@ -70,8 +70,14 @@ class SQLBundle:
70
70
  [SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"],
71
71
  [SQLConstants.SQL_SELECT_COLUMNNAMES_WITH_WHERE, "sel {0} from {1} where {2}"],
72
72
  [SQLConstants.SQL_HELP_DATABASE, "HELP DATABASE {0}"],
73
- [SQLConstants.SQL_HELP_DATALAKE, "HELP DATALAKE {0}"]
74
-
73
+ [SQLConstants.SQL_HELP_DATALAKE, "HELP DATALAKE {0}"],
74
+ [SQLConstants.SQL_INSERT_INTO_TABLE_VALUES_WITH_COLUMN_NAMES, "insert into {0} ({1}) values({2})"],
75
+ [SQLConstants.SQL_HELP_DATALAKE, "HELP DATALAKE {0}"],
76
+ [SQLConstants.SQL_TD_OTF_METADATA, "SELECT * FROM {0}(ON ({1})) D;"],
77
+ [SQLConstants.SQL_TD_OTF_SNAPSHOT, "SELECT * FROM {0} FOR SNAPSHOT AS OF {1};"],
78
+ [SQLConstants.SQL_LIST_TRIGGERS, "SELECT TriggerName FROM DBC.TRIGGERSV WHERE DatabaseName = '{0}' AND TriggerName LIKE '{1}'"],
79
+ [SQLConstants.SQL_SHOW_TABLE, "SHOW TABLE {0}"],
80
+ [SQLConstants.SQL_SHOW_VIEW, "SHOW VIEW {0}"]
75
81
  ]
76
82
  self._add_sql_version()
77
83
 
@@ -157,7 +163,7 @@ class SQLBundle:
157
163
  return sqlbundle._get_sql_query(SQLConstants.SQL_BASE_QUERY).format(name)
158
164
 
159
165
  @staticmethod
160
- def _build_create_view(view_name, select_expression):
166
+ def _build_create_view(view_name, select_expression, lock_rows=False):
161
167
  """
162
168
  Builds a CREATE VIEW DDL statement.
163
169
  For Example,
@@ -166,6 +172,11 @@ class SQLBundle:
166
172
  PARAMETERS:
167
173
  view_name - Viewname to be created
168
174
  select_expression - A SQL from which a view is to be created. (SELECT query)
175
+ lock_rows - When set to True, teradataml DataFrame locks the corresponding row(s)
176
+ in underlying table(s) while accessing the data. Otherwise,
177
+ teradataml DataFrame access the data without locking the rows.
178
+ Default is False.
179
+
169
180
 
170
181
  RETURNS:
171
182
  A CREATE VIEW DDL statement
@@ -179,6 +190,8 @@ class SQLBundle:
179
190
  """
180
191
  sqlbundle = SQLBundle()
181
192
  query = sqlbundle._get_sql_query(SQLConstants.SQL_CREATE_VIEW)
193
+ if lock_rows:
194
+ select_expression = "LOCKING ROW FOR ACCESS {}".format(select_expression)
182
195
  return query.format(view_name, select_expression)
183
196
 
184
197
  @staticmethod
@@ -635,7 +648,7 @@ class SQLBundle:
635
648
  return ddlstmt.format(tablename, columns_datatypes)
636
649
 
637
650
  @staticmethod
638
- def _build_insert_into_table_records(tablename, columns):
651
+ def _build_insert_into_table_records(tablename, columns, column_names=None):
639
652
  """
640
653
  Builds a prepared statement with parameter markers for a table.
641
654
  This is an internal function.
@@ -643,6 +656,7 @@ class SQLBundle:
643
656
  PARAMETERS:
644
657
  tablename - Table name to insert data.
645
658
  columns - The parameter markers for the prepared statement
659
+ column_names - The column names to be inserted.
646
660
 
647
661
  RETURNS:
648
662
  Returns a prepared statement.
@@ -652,11 +666,15 @@ class SQLBundle:
652
666
 
653
667
  EXAMPLES:
654
668
  preprdstmt = SQLBundle.SQL_INSERT_INTO_TABLE_VALUES('mytab', '?, ?')
655
-
669
+ preprdstmt = SQLBundle.SQL_INSERT_INTO_TABLE_VALUES_WITH_COLUMN_NAMES('mytab', 'column1, column2', '?, ?')
670
+
656
671
  """
657
672
  sqlbundle = SQLBundle()
658
- query = sqlbundle._get_sql_query(SQLConstants.SQL_INSERT_INTO_TABLE_VALUES)
659
- return query.format(tablename, columns)
673
+ sqlkey = (SQLConstants.SQL_INSERT_INTO_TABLE_VALUES_WITH_COLUMN_NAMES
674
+ if column_names else SQLConstants.SQL_INSERT_INTO_TABLE_VALUES)
675
+ query = sqlbundle._get_sql_query(sqlkey)
676
+ return (query.format(tablename, column_names, columns)
677
+ if column_names else query.format(tablename, columns))
660
678
 
661
679
  @staticmethod
662
680
  def _build_delete_all_rows_from_table(tablename):
@@ -13,11 +13,12 @@ by other classes which can be reused according to the need.
13
13
  Add all the common functions in this class like creating temporary table names, getting
14
14
  the datatypes etc.
15
15
  """
16
+ import datetime
16
17
  import json
17
18
  import os
18
19
  import re
19
20
  import time
20
- import uuid
21
+ import uuid, hashlib
21
22
  import warnings
22
23
  from functools import reduce
23
24
  from inspect import getsource
@@ -39,8 +40,9 @@ from teradatasqlalchemy.types import (BIGINT, BLOB, BYTE, BYTEINT, CHAR, CLOB,
39
40
  from teradataml import _version
40
41
  from teradataml.common import td_coltype_code_to_tdtype
41
42
  from teradataml.common.constants import (HTTPRequest, PTITableConstants,
42
- PythonTypes, TeradataConstants,
43
- TeradataReservedKeywords,
43
+ PythonTypes, SQLConstants,
44
+ TeradataConstants,
45
+ TeradataReservedKeywords, TeradataTableKindConstants,
44
46
  TeradataTypes)
45
47
  from teradataml.common.exceptions import TeradataMlException
46
48
  from teradataml.common.garbagecollector import GarbageCollector
@@ -280,26 +282,37 @@ class UtilFuncs():
280
282
  RAISES:
281
283
 
282
284
  EXAMPLES:
283
- new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
284
- new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
285
+ >>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
286
+ >>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
285
287
  table_type = TeradataConstants.TERADATA_VIEW)
286
- new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
288
+ >>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
287
289
  table_type = TeradataConstants.TERADATA_TABLE)
290
+ # Example when use_short_object_name is set to True
291
+ >>> from teradataml.options.configure import configure
292
+ >>> configure.use_short_object_name = True
293
+ >>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
288
294
 
289
295
  Output:
290
296
  tdml_temp_table__1517501990393350 (or)
291
297
  tdqg.tdml_temp_table__1517501990393350 (or)
292
298
  tdml_temp_table__from_pandas_1517501990393350 (or)
293
- tdqg.tdml_temp_table__from_pandas_1517501990393350
299
+ tdqg.tdml_temp_table__from_pandas_1517501990393350 (or)
300
+ ml__1749637109887272
294
301
  """
295
302
  # Number of seconds since Jan 1, 1970 00:00:00
296
303
  timestamp = time.time()
304
+ use_short_name = configure.use_short_object_name
297
305
  tabname = "ml_"
298
306
  random_string = "{}{}".format(floor(timestamp / 1000000),
299
307
  floor(timestamp % 1000000 * 1000000 +
300
308
  int(str(uuid.uuid4().fields[-1])[:10])))
301
- if prefix is not None:
309
+
310
+ # Append prefix only if use_short_object_name is False and prefix is provided.
311
+ if (not use_short_name) and (prefix is not None):
302
312
  tabname = "{}_{}".format(tabname, prefix)
313
+ # Append prefix "tdml" when use_short_object_name is True and random string is of length 15.
314
+ elif use_short_name and (len(random_string)==15):
315
+ tabname = "tdml"
303
316
 
304
317
  tabname = "{}_{}".format(tabname, random_string)
305
318
 
@@ -309,7 +322,8 @@ class UtilFuncs():
309
322
  tabname = "\"{}\".\"{}\"".format(_get_user(), tabname)
310
323
  return tabname
311
324
 
312
- if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
325
+ if (not use_short_name) and (configure.temp_object_type == TeradataConstants.
326
+ TERADATA_VOLATILE_TABLE):
313
327
  from teradataml.context.context import _get_user
314
328
  return "\"{}\".\"{}_{}\"".format(_get_user(), "vt", tabname)
315
329
 
@@ -653,13 +667,17 @@ class UtilFuncs():
653
667
 
654
668
  @staticmethod
655
669
  @collect_queryband(queryband='CreateView')
656
- def _create_view(view_name, query):
670
+ def _create_view(view_name, query, lock_rows=False):
657
671
  """
658
672
  Create a view from the given query.
659
673
 
660
674
  PARAMETERS:
661
675
  view_name - View name
662
676
  query - SQL query
677
+ lock_rows - When set to True, teradataml DataFrame locks the corresponding row(s)
678
+ in underlying table(s) while accessing the data. Otherwise,
679
+ teradataml DataFrame access the data without locking the rows.
680
+ Default is False.
663
681
 
664
682
  RAISES
665
683
 
@@ -669,7 +687,8 @@ class UtilFuncs():
669
687
  EXAMPLES:
670
688
  UtilFuncs._create_view(view_name, "select * from table_name")
671
689
  """
672
- crt_view = SQLBundle._build_create_view(view_name, query)
690
+
691
+ crt_view = SQLBundle._build_create_view(view_name, query, lock_rows)
673
692
  try:
674
693
  UtilFuncs._execute_ddl_statement(crt_view)
675
694
  return True
@@ -928,9 +947,13 @@ class UtilFuncs():
928
947
  EXAMPLES:
929
948
  UtilFuncs._get_help_vtablenames()
930
949
  """
931
- vtables = UtilFuncs._execute_query(SQLBundle._build_help_volatile_table())
932
- if vtables:
933
- return list(map(str.strip, filter(None, vtables[0])))
950
+ vtables = UtilFuncs._execute_query(SQLBundle._build_help_volatile_table(), fetchWarnings=True)
951
+ if vtables and vtables[0] and vtables[1]:
952
+ rows, columns = vtables
953
+ key = TeradataTableKindConstants.VOLATILE_TABLE_NAME.value
954
+ # Find the index of the column matching the table name
955
+ col_idx = columns.index(key)
956
+ return [row[col_idx].strip() for row in rows if row[col_idx]]
934
957
  return []
935
958
 
936
959
  @staticmethod
@@ -1310,7 +1333,8 @@ class UtilFuncs():
1310
1333
 
1311
1334
  return False
1312
1335
 
1313
- def _is_ascii(col_lst):
1336
+ @staticmethod
1337
+ def _is_non_ascii(col_lst):
1314
1338
  """
1315
1339
  Description:
1316
1340
  Check if the specified string in col_lst has non-ASCII characters in it.
@@ -1569,7 +1593,7 @@ class UtilFuncs():
1569
1593
  return UtilFuncs._teradata_quote_arg(args_list, quote, False)
1570
1594
 
1571
1595
  @staticmethod
1572
- def _get_metaexpr_using_columns(nodeid, column_info, with_engine=False, is_persist=False):
1596
+ def _get_metaexpr_using_columns(nodeid, column_info, with_engine=False, is_persist=False, **kw):
1573
1597
  """
1574
1598
  This internal function takes in input node ID and column information in zipped lists format
1575
1599
  to return metaexpr with or without engine.
@@ -1610,7 +1634,7 @@ class UtilFuncs():
1610
1634
  ouptut_table = Table(db_table_name, meta,
1611
1635
  *(Column(col_name, col_type) for col_name, col_type in column_info),
1612
1636
  schema=db_schema)
1613
- return _MetaExpression(ouptut_table, is_persist=is_persist)
1637
+ return _MetaExpression(ouptut_table, is_persist=is_persist, **kw)
1614
1638
 
1615
1639
  @staticmethod
1616
1640
  def _get_metaexpr_using_parent_metaexpr(nodeid, metaexpr):
@@ -1648,7 +1672,8 @@ class UtilFuncs():
1648
1672
  col_names.append(meta_cols[col_name].name)
1649
1673
  col_types.append(meta_cols[col_name].type)
1650
1674
 
1651
- return UtilFuncs._get_metaexpr_using_columns(nodeid, zip(col_names, col_types))
1675
+ return UtilFuncs._get_metaexpr_using_columns(nodeid, zip(col_names, col_types),
1676
+ datalake=metaexpr.datalake)
1652
1677
 
1653
1678
  @staticmethod
1654
1679
  def _create_table_using_columns(table_name, columns_datatypes, pti_clause=None, storage=None):
@@ -2670,7 +2695,10 @@ class UtilFuncs():
2670
2695
  user_function_code = ""
2671
2696
  for func in user_functions:
2672
2697
  # Get the source code of the user function.
2673
- func = getsource(func)
2698
+ # Note that, checking for lambda function is required for teradatamlspk UDFs
2699
+ # If the function is a lambda function, get the source code from __source__.
2700
+ func = getsource(func) if func.__code__.co_name != "<lambda>" else func.__source__
2701
+
2674
2702
  # If the function have any extra space in the beginning remove it.
2675
2703
  func = func.lstrip()
2676
2704
  # Function can have decorator,e.g. udf as decorator, remove it.
@@ -2753,7 +2781,8 @@ class UtilFuncs():
2753
2781
  MessageCodes.PYTHON_VERSION_MISMATCH_OAF)
2754
2782
  else:
2755
2783
  from teradataml.context import context as tdmlctx
2756
- from teradataml.dbutils.dbutils import (db_python_version_diff, set_session_param)
2784
+ from teradataml.dbutils.dbutils import (db_python_version_diff,
2785
+ set_session_param)
2757
2786
  set_session_param("searchuifdbpath",
2758
2787
  UtilFuncs._get_dialect_quoted_name(tdmlctx._get_current_databasename()))
2759
2788
  if len(db_python_version_diff()) > 0:
@@ -2798,7 +2827,8 @@ class UtilFuncs():
2798
2827
  """
2799
2828
 
2800
2829
  # Check if OSML required packages are verified or not.
2801
- from teradataml.opensource._constants import _packages_verified_in_vantage
2830
+ from teradataml.opensource._constants import \
2831
+ _packages_verified_in_vantage
2802
2832
  _is_packages_verfied_in_vantage = _packages_verified_in_vantage.get(
2803
2833
  func, None)
2804
2834
  if _is_packages_verfied_in_vantage:
@@ -2855,7 +2885,8 @@ class UtilFuncs():
2855
2885
 
2856
2886
  else:
2857
2887
  # Check if the versions of Python packages are consistent between Vantage and local.
2858
- from teradataml.dbutils.dbutils import _db_python_package_version_diff
2888
+ from teradataml.dbutils.dbutils import \
2889
+ _db_python_package_version_diff
2859
2890
  all_package_versions = _db_python_package_version_diff(packages, only_diff=False)
2860
2891
  package_difference = \
2861
2892
  all_package_versions[all_package_versions.vantage != all_package_versions.local]
@@ -2922,6 +2953,163 @@ class UtilFuncs():
2922
2953
  tdp = preparer(td_dialect)
2923
2954
  return tdp.quote(object_name)
2924
2955
 
2956
+ @staticmethod
2957
+ def _get_hash_value(identifier):
2958
+ """
2959
+ DESCRIPTION:
2960
+ Function to get the hash value of the identifier.
2961
+
2962
+ PARAMETERS:
2963
+ identifier
2964
+ Required Argument.
2965
+ Specifies the identifier to be hashed.
2966
+ Type: str
2967
+
2968
+ RAISES:
2969
+ None
2970
+
2971
+ RETURNS:
2972
+ Hash value of the identifier.
2973
+
2974
+ EXAMPLES:
2975
+ UtilFuncs._get_hash_value(identifier = "tdml.alice")
2976
+
2977
+ OUTPUT:
2978
+ a6c64c2c_58e9_5060_b811_00839ea493ed
2979
+ """
2980
+ # Generate a hash value using SHA-256
2981
+ hash_object = hashlib.sha256(identifier.encode())
2982
+ hash_hex = hash_object.hexdigest()
2983
+
2984
+ # Format the hash value to match the desired format
2985
+ formatted_hash = f"{hash_hex[:8]}_{hash_hex[8:12]}_{hash_hex[12:16]}_{hash_hex[16:20]}_{hash_hex[20:32]}"
2986
+
2987
+ return formatted_hash
2988
+
2989
+ @staticmethod
2990
+ def _get_http_status_phrases_description():
2991
+ """
2992
+ DESCRIPTION:
2993
+ Function to get phrases and description for all HTTP status codes.
2994
+
2995
+ PARAMETERS:
2996
+ None
2997
+
2998
+ RETURNS:
2999
+ dict
3000
+
3001
+ EXAMPLES:
3002
+ >>> UtilFuncs._get_http_status_phrases_description()
3003
+ """
3004
+ from http import HTTPStatus
3005
+ return {status.value: {"phrase": status.phrase, "description": status.description} \
3006
+ for status in HTTPStatus}
3007
+
3008
+ @staticmethod
3009
+ def _get_time_formatted_string(period):
3010
+ """
3011
+ DESCRIPTION:
3012
+ Converts a string representing Period to the formatted TIMESTAMP/DATE string for snapshot queries.
3013
+
3014
+ PARAMETERS:
3015
+ period:
3016
+ Required Argument.
3017
+ Specifies the period string to be converted.
3018
+ Types: str
3019
+
3020
+ RETURNS:
3021
+ The formatted TIMESTAMP/DATE string.
3022
+
3023
+ RAISES:
3024
+ ValueError.
3025
+
3026
+ EXAMPLES:
3027
+ >>> UtilFuncs._get_time_formatted_string('2025-06-01 12:00:00.123')
3028
+ """
3029
+ # Try to parse as datetime string
3030
+ try:
3031
+ for fmt in ["%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]:
3032
+ try:
3033
+ dt = datetime.datetime.strptime(period, fmt)
3034
+ # If input had microseconds, preserve them
3035
+ if "%f" in fmt and "." in period:
3036
+ # Remove trailing zeros and dot if needed
3037
+ result = "TIMESTAMP'{}'".format(dt.strftime("%Y-%m-%d %H:%M:%S.%f").rstrip("0").rstrip("."))
3038
+ elif "%S" in fmt:
3039
+ result = "TIMESTAMP'{}'".format(dt.strftime("%Y-%m-%d %H:%M:%S"))
3040
+ else:
3041
+ result = "DATE'{}'".format(dt.strftime("%Y-%m-%d"))
3042
+ return result
3043
+ except ValueError:
3044
+ continue
3045
+ raise ValueError(f"Unrecognized period format: {period}")
3046
+ except Exception as e:
3047
+ raise ValueError(f"Could not convert period: {period}") from e
3048
+
3049
+ @staticmethod
3050
+ def extract_table_names_from_query(query):
3051
+ """
3052
+ Extracts all table/view names from FROM, JOIN, and ON-AS clauses in a SQL query.
3053
+ Handles nested queries and captures subqueries in ON (...), and ON <table> AS <alias>.
3054
+ """
3055
+ # Regex for FROM, JOIN, and ON ... AS ... clauses
3056
+ # This is a simplification; for production, use a SQL parser.
3057
+ table_names = set()
3058
+ # FROM ... (possibly with nested SELECT)
3059
+ for match in re.finditer(r'from\s+([^\s\(\)]+)', query, re.IGNORECASE):
3060
+ table_names.add(match.group(1).strip())
3061
+ # JOIN ... (possibly with nested SELECT)
3062
+ for match in re.finditer(r'join\s+([^\s\(\)]+)', query, re.IGNORECASE):
3063
+ table_names.add(match.group(1).strip())
3064
+ # ON ( ... ) AS ... Nested Query in ON Clause.
3065
+ for match in re.finditer(r'ON\s+\(([^)]+)\)\s+AS\s+["\']?\w+["\']?', query, re.IGNORECASE):
3066
+ table_names.update(UtilFuncs.extract_table_names_from_query(match.group(1)))
3067
+ # ON <table> AS <alias> (no parentheses)
3068
+ for match in re.finditer(r'ON\s+(["\']?\w+["\']?(?:\.["\']?\w+["\']?)*)\s+AS\s+["\']?\w+["\']?', query, re.IGNORECASE):
3069
+ table_names.add(match.group(1).strip())
3070
+ return list(table_names)
3071
+
3072
+ @staticmethod
3073
+ def _get_normalize_and_deduplicate_columns(columns):
3074
+ """
3075
+ DESCRIPTION:
3076
+ Function that normalizes and deduplicates a list of column names.
3077
+ This function processes the "columns", which can be a list of column names
3078
+ as strings or ColumnExpression, or a single column name/ColumnExpression.
3079
+ It extracts the column names, removes duplicates while preserving order,
3080
+ and returns the resulting list of unique column names.
3081
+
3082
+ PARAMETERS:
3083
+ columns:
3084
+ Required Argument.
3085
+ Specifies the column.
3086
+ Types: str, ColumnExpression, list of str or ColumnExpression
3087
+
3088
+ RAISES:
3089
+ None
3090
+
3091
+ RETURNS:
3092
+ list
3093
+
3094
+ EXAMPLES:
3095
+ >>> load_examples_data('dataframe', 'sales')
3096
+ >>> df = DataFrame('sales')
3097
+ >>> columns = [df.Jan, 'Jan', 'Feb', df.Feb, 'Mar']
3098
+ >>> UtilFuncs._get_normalize_and_deduplicate_columns(columns)
3099
+ ['Jan', 'Feb', 'Mar']
3100
+
3101
+ """
3102
+ columns_list = []
3103
+ seen = set()
3104
+
3105
+ for column in (columns if isinstance(columns, list) else [columns]):
3106
+ name = column if isinstance(column, str) else column.name
3107
+ if name not in seen:
3108
+ seen.add(name)
3109
+ columns_list.append(name)
3110
+
3111
+ return columns_list
2925
3112
 
3113
+ # Keeping at the end to avoid circular dependency
2926
3114
  from teradataml.common.aed_utils import AedUtils
2927
3115
  from teradataml.dbutils.filemgr import remove_file
@@ -115,20 +115,26 @@ class AEDContext:
115
115
  """
116
116
  # Define extension to load AED library depending on the OS platform.
117
117
  os_type = platform.system()
118
+ arch_suffix = ""
118
119
  if (os_type == "Windows"):
119
- self.extension = "dll"
120
- self.lib_name = "aed"
120
+ extension = "dll"
121
+ lib_name = "aed"
121
122
  elif (os_type == "Darwin"):
122
- self.extension = "dylib"
123
- self.lib_name = "libaed"
123
+ extension = "dylib"
124
+ lib_name = "libaed"
124
125
  else:
125
- self.extension = "so"
126
- self.lib_name = "libaed"
126
+ # Linux case
127
+ extension = "so"
128
+ lib_name = "libaed"
129
+ # Only apply architecture suffix for aarch64/ARM64
130
+ machine = platform.uname().machine
131
+ if machine == "aarch64":
132
+ arch_suffix = "_" + machine
133
+
127
134
  # TODO:: Use logger when it is available.
128
- libPathName = os.path.join(os.sep,
129
- os.path.dirname(os.path.abspath(__file__)),
130
- "../lib", #"Debug",
131
- "{0}_0_1.{1}".format(self.lib_name, self.extension))
135
+ from teradataml import _TDML_DIRECTORY
136
+ libPathName = os.path.join(os.sep, _TDML_DIRECTORY, "lib",
137
+ "{0}_0_1{1}.{2}".format(lib_name, arch_suffix, extension))
132
138
  try:
133
139
  elecommon = cdll.LoadLibrary(libPathName)
134
140
  except Exception as err: