teradataml 20.0.0.5__py3-none-any.whl → 20.0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +1 -1
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +162 -76
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/__init__.py +2 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
- teradataml/analytics/json_parser/metadata.py +22 -4
- teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
- teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
- teradataml/analytics/sqle/__init__.py +3 -0
- teradataml/analytics/utils.py +59 -11
- teradataml/automl/__init__.py +2369 -464
- teradataml/automl/autodataprep/__init__.py +15 -0
- teradataml/automl/custom_json_utils.py +184 -112
- teradataml/automl/data_preparation.py +113 -58
- teradataml/automl/data_transformation.py +154 -53
- teradataml/automl/feature_engineering.py +113 -53
- teradataml/automl/feature_exploration.py +548 -25
- teradataml/automl/model_evaluation.py +260 -32
- teradataml/automl/model_training.py +399 -206
- teradataml/clients/auth_client.py +10 -6
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/common/aed_utils.py +11 -2
- teradataml/common/bulk_exposed_utils.py +4 -2
- teradataml/common/constants.py +72 -2
- teradataml/common/exceptions.py +32 -0
- teradataml/common/garbagecollector.py +50 -21
- teradataml/common/messagecodes.py +73 -1
- teradataml/common/messages.py +27 -1
- teradataml/common/sqlbundle.py +25 -7
- teradataml/common/utils.py +210 -22
- teradataml/context/aed_context.py +16 -10
- teradataml/context/context.py +37 -9
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/byom_example.json +5 -0
- teradataml/data/creditcard_data.csv +284618 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/onnxembeddings.json +1 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/load_example_data.py +29 -11
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/teradataml_example.json +75 -1
- teradataml/data/url_data.csv +10 -9
- teradataml/dataframe/copy_to.py +715 -55
- teradataml/dataframe/dataframe.py +2115 -97
- teradataml/dataframe/dataframe_utils.py +66 -28
- teradataml/dataframe/functions.py +1130 -2
- teradataml/dataframe/setop.py +4 -1
- teradataml/dataframe/sql.py +710 -1039
- teradataml/dbutils/dbutils.py +470 -35
- teradataml/dbutils/filemgr.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +456 -142
- teradataml/hyperparameter_tuner/utils.py +4 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/opensource/_base.py +7 -1
- teradataml/options/configure.py +20 -4
- teradataml/scriptmgmt/UserEnv.py +247 -36
- teradataml/scriptmgmt/lls_utils.py +140 -39
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/series.py +12 -7
- teradataml/store/feature_store/constants.py +601 -234
- teradataml/store/feature_store/feature_store.py +2886 -616
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +5831 -214
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/query_generator.py +4 -21
- teradataml/table_operators/table_operator_util.py +1 -1
- teradataml/table_operators/templates/dataframe_register.template +6 -2
- teradataml/table_operators/templates/dataframe_udf.template +6 -2
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +95 -1
- teradataml/utils/internal_buffer.py +2 -2
- teradataml/utils/utils.py +41 -3
- teradataml/utils/validators.py +699 -18
- {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +312 -2
- {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +119 -87
- {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
|
@@ -126,6 +126,8 @@ class ErrorInfoCodes(Enum):
|
|
|
126
126
|
CANNOT_USE_TOGETHER_WITH = 'TDML_2042'
|
|
127
127
|
TABLE_DOES_NOT_EXIST = 'TDML_2046'
|
|
128
128
|
DEPENDENT_METHOD = 'TDML_2113'
|
|
129
|
+
TDMLDF_COLUMN_IN_ARG_FOUND = 'TDML_2114'
|
|
130
|
+
EITHER_ANY_ARGUMENT = 'TDML_2115'
|
|
129
131
|
|
|
130
132
|
# Reserved for Generic Error Messages: 2121 - 2199
|
|
131
133
|
RESERVED_KEYWORD = 'TDML_2121'
|
|
@@ -227,6 +229,38 @@ class ErrorInfoCodes(Enum):
|
|
|
227
229
|
|
|
228
230
|
# OpenAF Error codes starting from 2551 - Reserved till 2560.
|
|
229
231
|
SET_REQUIRED_PARAMS = 'TDML_2551'
|
|
232
|
+
INVALID_USAGE = 'TDML_2552'
|
|
233
|
+
|
|
234
|
+
# Error codes for OTF. Reserved till 2570.
|
|
235
|
+
OTF_TABLE_REQUIRED = 'TDML_2561'
|
|
236
|
+
|
|
237
|
+
# Rest Excceptions. Reserved for 2570 - 2580
|
|
238
|
+
REST_HTTP_ERROR = 'TDML_2570'
|
|
239
|
+
REST_AUTH_MISSING_ARG = 'TDML_2571'
|
|
240
|
+
REST_NOT_CONFIGURED = 'TDML_2572'
|
|
241
|
+
REST_DEVICE_CODE_NO_BOTH = 'TDML_2573'
|
|
242
|
+
REST_DEVICE_CODE_GEN_FAILED = 'TDML_2574'
|
|
243
|
+
REST_DEVICE_CODE_AUTH_FAILED = 'TDML_2575'
|
|
244
|
+
|
|
245
|
+
# Python SDK Error codes starting from 2580 - Reserved till 2590.
|
|
246
|
+
INFO_NOT_PROVIDED_USE_DEFAULT = 'TDML_W_2580' # Logger warning.
|
|
247
|
+
|
|
248
|
+
# EFS Error codes starting from 2600 - Reserved till 2650.
|
|
249
|
+
EFS_COMPONENT_NOT_EXIST = 'TDML_2600'
|
|
250
|
+
EFS_INVALID_PROCESS_TYPE = 'TDML_2601'
|
|
251
|
+
EFS_INVALID_FEATURE_TYPE = 'TDML_2602'
|
|
252
|
+
EFS_FEATURE_IN_DATASET = 'TDML_2603'
|
|
253
|
+
EFS_FEATURE_IN_CATALOG = 'TDML_2604'
|
|
254
|
+
EFS_ENTITY_IN_CATALOG = 'TDML_2605'
|
|
255
|
+
DF_DUPLICATE_VALUES = 'TDML_2606'
|
|
256
|
+
DF_NULL_VALUES = 'TDML_2607'
|
|
257
|
+
EFS_FEATURE_ENTITY_MISMATCH = 'TDML_2608'
|
|
258
|
+
FEATURES_ARCHIVED = 'TDML_2609'
|
|
259
|
+
EFS_DELETE_BEFORE_ARCHIVE = 'TDML_2610'
|
|
260
|
+
EFS_OBJ_IN_FEATURE_PROCESS = 'TDML_2611'
|
|
261
|
+
EFS_OBJECT_NOT_EXIST = 'TDML_2612'
|
|
262
|
+
EFS_OBJECT_IN_OTHER_DOMAIN = 'TDML_2613'
|
|
263
|
+
|
|
230
264
|
|
|
231
265
|
class MessageCodes(Enum):
|
|
232
266
|
"""
|
|
@@ -373,7 +407,7 @@ class MessageCodes(Enum):
|
|
|
373
407
|
VANTAGE_WARNING = "Following warning raised from Vantage with warning code: {}\n{}"
|
|
374
408
|
FASTLOAD_FAILS = "fastload() failed to load pandas dataframe to Teradata Vantage."
|
|
375
409
|
REMOVE_FILE_FAILED = "Failed to remove {} from Teradata Vantage"
|
|
376
|
-
INPUT_FILE_NOT_FOUND = "Input file '{}' not found. Please check the file path."
|
|
410
|
+
INPUT_FILE_NOT_FOUND = "Input file(s) '{}' not found. Please check the file path(s)."
|
|
377
411
|
INSTALL_FILE_FAILED = "File '{}' cannot be installed."
|
|
378
412
|
REPLACE_FILE_FAILED = "Unable to replace '{}'"
|
|
379
413
|
URL_UNREACHABLE = "URL '{}' is unreachable."
|
|
@@ -396,6 +430,7 @@ class MessageCodes(Enum):
|
|
|
396
430
|
NO_ENVIRONMENT_FOUND = "No {} environment(s) found."
|
|
397
431
|
UNSUPPORTED_FILE_EXTENSION = "Unsupported file extension specified. Supported file extensions is/are {}."
|
|
398
432
|
FILE_EMPTY = "Input file {} is empty."
|
|
433
|
+
EITHER_ANY_ARGUMENT = "Provide either {} argument(s)."
|
|
399
434
|
|
|
400
435
|
PYTHON_NOT_INSTALLED = "Python is not installed on Vantage. " \
|
|
401
436
|
"Please install Python interpreter and add-on packages on Vantage."
|
|
@@ -442,3 +477,40 @@ class MessageCodes(Enum):
|
|
|
442
477
|
"explicitly passed to function or specified using a configuration file, or setting up " \
|
|
443
478
|
"the environment variables."
|
|
444
479
|
DEPENDENT_METHOD = "Method(s) {} must be called before calling '{}'."
|
|
480
|
+
TDMLDF_COLUMN_IN_ARG_FOUND = "Column '{}' provided in '{}' argument, exist in {} {}."
|
|
481
|
+
INVALID_USAGE = "Invalid usage of {0} {1}. Use {0} {1} only {2}."
|
|
482
|
+
REST_HTTP_ERROR = "Failed to run rest API:\n{}"
|
|
483
|
+
REST_AUTH_MISSING_ARG = "For '{}' authentication, '{}' is/are not provided in config file or "\
|
|
484
|
+
"environment variable or through constructor argument 'auth'."
|
|
485
|
+
REST_NOT_CONFIGURED = "The argument '{}' is not set. {} endpoint not configured.\n" \
|
|
486
|
+
"Try (re)copy the CLI configuration from {} UI -> Session Details -> CLI Config."
|
|
487
|
+
REST_DEVICE_CODE_NO_BOTH = "Token does not contain access_token or refresh_token. Received token: {}"
|
|
488
|
+
REST_DEVICE_CODE_GEN_FAILED = "Error generating the device code. Received code: {}."
|
|
489
|
+
REST_DEVICE_CODE_AUTH_FAILED = "Error authenticating the device code.\n{}."
|
|
490
|
+
|
|
491
|
+
INFO_NOT_PROVIDED_USE_DEFAULT = "{} is not provided in path '{}' method '{}' for operationID '{}' using default {}."
|
|
492
|
+
OTF_TABLE_REQUIRED = "{} is supported only with OTF table."
|
|
493
|
+
EFS_COMPONENT_NOT_EXIST = "{} '{}' does not exist. Use {} to list valid {}."
|
|
494
|
+
EFS_INVALID_PROCESS_TYPE = "Invalid process type '{}' detected. Valid types are: {}."
|
|
495
|
+
EFS_INVALID_FEATURE_TYPE = ("Invalid feature type '{}' detected for feature ''. "
|
|
496
|
+
"Features cannot ignest for types: {}.")
|
|
497
|
+
EFS_FEATURE_IN_DATASET = ("Feature(s) {} is/are associated with an existing dataset(s) {}. "
|
|
498
|
+
"Feature(s) can be {} only when they are not associated with any dataset. "
|
|
499
|
+
"Use 'DatasetCatalog.list_datasets()' to see the list of features associated with datasets.")
|
|
500
|
+
EFS_FEATURE_IN_CATALOG = ("Feature '{}' exists in feature catalog. "
|
|
501
|
+
"Delete the feature first using FeatureCatalog.delete_features().")
|
|
502
|
+
EFS_ENTITY_IN_CATALOG = ("Entity '{}' is associated with feature(s) {} in Feature catalog. "
|
|
503
|
+
"Delete these features using FeatureCatalog.delete_features().")
|
|
504
|
+
DF_DUPLICATE_VALUES = "Duplicate {} are not allowed. Found the duplicate value(s) {}."
|
|
505
|
+
DF_NULL_VALUES = ("Null value(s) are not allowed in {} while {}. "
|
|
506
|
+
"Found the null value(s) {}.")
|
|
507
|
+
EFS_FEATURE_ENTITY_MISMATCH = ("Feature(s) {} is/are associated with entities {}. One cannot "
|
|
508
|
+
"ingest same feature for another entity in the same data domain. "
|
|
509
|
+
"Either choose a different feature name or choose a different data domain.")
|
|
510
|
+
FEATURES_ARCHIVED = "Feature(s) {} is/are archived. {}"
|
|
511
|
+
EFS_DELETE_BEFORE_ARCHIVE = ("{0} '{1}' is not archived. Archive the {0} before deleting it."
|
|
512
|
+
"Use 'FeatureStore.archive_{2}()' to archive the {0}.")
|
|
513
|
+
EFS_OBJ_IN_FEATURE_PROCESS = ("{0} '{1}' is associated with {2}. {0} can be modified only when it is "
|
|
514
|
+
"not associated with {2}. Archive the {3} using {4} and try again.")
|
|
515
|
+
EFS_OBJECT_NOT_EXIST = "{} with {} does not exist in data domain '{}'."
|
|
516
|
+
EFS_OBJECT_IN_OTHER_DOMAIN = "{} with {} does not exist in data domain '{}'. It exists in other data domain(s): {}."
|
teradataml/common/messages.py
CHANGED
|
@@ -196,7 +196,33 @@ class Messages():
|
|
|
196
196
|
[ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE],
|
|
197
197
|
[ErrorInfoCodes.SET_REQUIRED_PARAMS, MessageCodes.SET_REQUIRED_PARAMS],
|
|
198
198
|
[ErrorInfoCodes.MISSING_ARGS, MessageCodes.CONNECTION_PARAMS],
|
|
199
|
-
[ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD]
|
|
199
|
+
[ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD],
|
|
200
|
+
[ErrorInfoCodes.TDMLDF_COLUMN_IN_ARG_FOUND, MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND],
|
|
201
|
+
[ErrorInfoCodes.INVALID_USAGE, MessageCodes.INVALID_USAGE],
|
|
202
|
+
[ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD],
|
|
203
|
+
[ErrorInfoCodes.REST_HTTP_ERROR, MessageCodes.REST_HTTP_ERROR],
|
|
204
|
+
[ErrorInfoCodes.REST_AUTH_MISSING_ARG, MessageCodes.REST_AUTH_MISSING_ARG],
|
|
205
|
+
[ErrorInfoCodes.REST_NOT_CONFIGURED, MessageCodes.REST_NOT_CONFIGURED],
|
|
206
|
+
[ErrorInfoCodes.REST_DEVICE_CODE_NO_BOTH, MessageCodes.REST_DEVICE_CODE_NO_BOTH],
|
|
207
|
+
[ErrorInfoCodes.REST_DEVICE_CODE_GEN_FAILED, MessageCodes.REST_DEVICE_CODE_GEN_FAILED],
|
|
208
|
+
[ErrorInfoCodes.REST_DEVICE_CODE_AUTH_FAILED, MessageCodes.REST_DEVICE_CODE_AUTH_FAILED],
|
|
209
|
+
[ErrorInfoCodes.INFO_NOT_PROVIDED_USE_DEFAULT, MessageCodes.INFO_NOT_PROVIDED_USE_DEFAULT],
|
|
210
|
+
[ErrorInfoCodes.OTF_TABLE_REQUIRED, MessageCodes.OTF_TABLE_REQUIRED],
|
|
211
|
+
[ErrorInfoCodes.EFS_COMPONENT_NOT_EXIST, MessageCodes.EFS_COMPONENT_NOT_EXIST],
|
|
212
|
+
[ErrorInfoCodes.EFS_INVALID_PROCESS_TYPE, MessageCodes.EFS_INVALID_PROCESS_TYPE],
|
|
213
|
+
[ErrorInfoCodes.EFS_FEATURE_IN_DATASET, MessageCodes.EFS_FEATURE_IN_DATASET],
|
|
214
|
+
[ErrorInfoCodes.EFS_FEATURE_IN_CATALOG, MessageCodes.EFS_FEATURE_IN_CATALOG],
|
|
215
|
+
[ErrorInfoCodes.EFS_ENTITY_IN_CATALOG, MessageCodes.EFS_ENTITY_IN_CATALOG],
|
|
216
|
+
[ErrorInfoCodes.DF_DUPLICATE_VALUES, MessageCodes.DF_DUPLICATE_VALUES],
|
|
217
|
+
[ErrorInfoCodes.DF_NULL_VALUES, MessageCodes.DF_NULL_VALUES],
|
|
218
|
+
[ErrorInfoCodes.EFS_FEATURE_ENTITY_MISMATCH, MessageCodes.EFS_FEATURE_ENTITY_MISMATCH],
|
|
219
|
+
[ErrorInfoCodes.FEATURES_ARCHIVED, MessageCodes.FEATURES_ARCHIVED],
|
|
220
|
+
[ErrorInfoCodes.EFS_DELETE_BEFORE_ARCHIVE, MessageCodes.EFS_DELETE_BEFORE_ARCHIVE],
|
|
221
|
+
[ErrorInfoCodes.EFS_OBJ_IN_FEATURE_PROCESS, MessageCodes.EFS_OBJ_IN_FEATURE_PROCESS],
|
|
222
|
+
[ErrorInfoCodes.EFS_OBJECT_NOT_EXIST, MessageCodes.EFS_OBJECT_NOT_EXIST],
|
|
223
|
+
[ErrorInfoCodes.EFS_OBJECT_IN_OTHER_DOMAIN, MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN],
|
|
224
|
+
[ErrorInfoCodes.EITHER_ANY_ARGUMENT, MessageCodes.EITHER_ANY_ARGUMENT],
|
|
225
|
+
|
|
200
226
|
]
|
|
201
227
|
|
|
202
228
|
@staticmethod
|
teradataml/common/sqlbundle.py
CHANGED
|
@@ -70,8 +70,14 @@ class SQLBundle:
|
|
|
70
70
|
[SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"],
|
|
71
71
|
[SQLConstants.SQL_SELECT_COLUMNNAMES_WITH_WHERE, "sel {0} from {1} where {2}"],
|
|
72
72
|
[SQLConstants.SQL_HELP_DATABASE, "HELP DATABASE {0}"],
|
|
73
|
-
[SQLConstants.SQL_HELP_DATALAKE, "HELP DATALAKE {0}"]
|
|
74
|
-
|
|
73
|
+
[SQLConstants.SQL_HELP_DATALAKE, "HELP DATALAKE {0}"],
|
|
74
|
+
[SQLConstants.SQL_INSERT_INTO_TABLE_VALUES_WITH_COLUMN_NAMES, "insert into {0} ({1}) values({2})"],
|
|
75
|
+
[SQLConstants.SQL_HELP_DATALAKE, "HELP DATALAKE {0}"],
|
|
76
|
+
[SQLConstants.SQL_TD_OTF_METADATA, "SELECT * FROM {0}(ON ({1})) D;"],
|
|
77
|
+
[SQLConstants.SQL_TD_OTF_SNAPSHOT, "SELECT * FROM {0} FOR SNAPSHOT AS OF {1};"],
|
|
78
|
+
[SQLConstants.SQL_LIST_TRIGGERS, "SELECT TriggerName FROM DBC.TRIGGERSV WHERE DatabaseName = '{0}' AND TriggerName LIKE '{1}'"],
|
|
79
|
+
[SQLConstants.SQL_SHOW_TABLE, "SHOW TABLE {0}"],
|
|
80
|
+
[SQLConstants.SQL_SHOW_VIEW, "SHOW VIEW {0}"]
|
|
75
81
|
]
|
|
76
82
|
self._add_sql_version()
|
|
77
83
|
|
|
@@ -157,7 +163,7 @@ class SQLBundle:
|
|
|
157
163
|
return sqlbundle._get_sql_query(SQLConstants.SQL_BASE_QUERY).format(name)
|
|
158
164
|
|
|
159
165
|
@staticmethod
|
|
160
|
-
def _build_create_view(view_name, select_expression):
|
|
166
|
+
def _build_create_view(view_name, select_expression, lock_rows=False):
|
|
161
167
|
"""
|
|
162
168
|
Builds a CREATE VIEW DDL statement.
|
|
163
169
|
For Example,
|
|
@@ -166,6 +172,11 @@ class SQLBundle:
|
|
|
166
172
|
PARAMETERS:
|
|
167
173
|
view_name - Viewname to be created
|
|
168
174
|
select_expression - A SQL from which a view is to be created. (SELECT query)
|
|
175
|
+
lock_rows - When set to True, teradataml DataFrame locks the corresponding row(s)
|
|
176
|
+
in underlying table(s) while accessing the data. Otherwise,
|
|
177
|
+
teradataml DataFrame access the data without locking the rows.
|
|
178
|
+
Default is False.
|
|
179
|
+
|
|
169
180
|
|
|
170
181
|
RETURNS:
|
|
171
182
|
A CREATE VIEW DDL statement
|
|
@@ -179,6 +190,8 @@ class SQLBundle:
|
|
|
179
190
|
"""
|
|
180
191
|
sqlbundle = SQLBundle()
|
|
181
192
|
query = sqlbundle._get_sql_query(SQLConstants.SQL_CREATE_VIEW)
|
|
193
|
+
if lock_rows:
|
|
194
|
+
select_expression = "LOCKING ROW FOR ACCESS {}".format(select_expression)
|
|
182
195
|
return query.format(view_name, select_expression)
|
|
183
196
|
|
|
184
197
|
@staticmethod
|
|
@@ -635,7 +648,7 @@ class SQLBundle:
|
|
|
635
648
|
return ddlstmt.format(tablename, columns_datatypes)
|
|
636
649
|
|
|
637
650
|
@staticmethod
|
|
638
|
-
def _build_insert_into_table_records(tablename, columns):
|
|
651
|
+
def _build_insert_into_table_records(tablename, columns, column_names=None):
|
|
639
652
|
"""
|
|
640
653
|
Builds a prepared statement with parameter markers for a table.
|
|
641
654
|
This is an internal function.
|
|
@@ -643,6 +656,7 @@ class SQLBundle:
|
|
|
643
656
|
PARAMETERS:
|
|
644
657
|
tablename - Table name to insert data.
|
|
645
658
|
columns - The parameter markers for the prepared statement
|
|
659
|
+
column_names - The column names to be inserted.
|
|
646
660
|
|
|
647
661
|
RETURNS:
|
|
648
662
|
Returns a prepared statement.
|
|
@@ -652,11 +666,15 @@ class SQLBundle:
|
|
|
652
666
|
|
|
653
667
|
EXAMPLES:
|
|
654
668
|
preprdstmt = SQLBundle.SQL_INSERT_INTO_TABLE_VALUES('mytab', '?, ?')
|
|
655
|
-
|
|
669
|
+
preprdstmt = SQLBundle.SQL_INSERT_INTO_TABLE_VALUES_WITH_COLUMN_NAMES('mytab', 'column1, column2', '?, ?')
|
|
670
|
+
|
|
656
671
|
"""
|
|
657
672
|
sqlbundle = SQLBundle()
|
|
658
|
-
|
|
659
|
-
|
|
673
|
+
sqlkey = (SQLConstants.SQL_INSERT_INTO_TABLE_VALUES_WITH_COLUMN_NAMES
|
|
674
|
+
if column_names else SQLConstants.SQL_INSERT_INTO_TABLE_VALUES)
|
|
675
|
+
query = sqlbundle._get_sql_query(sqlkey)
|
|
676
|
+
return (query.format(tablename, column_names, columns)
|
|
677
|
+
if column_names else query.format(tablename, columns))
|
|
660
678
|
|
|
661
679
|
@staticmethod
|
|
662
680
|
def _build_delete_all_rows_from_table(tablename):
|
teradataml/common/utils.py
CHANGED
|
@@ -13,11 +13,12 @@ by other classes which can be reused according to the need.
|
|
|
13
13
|
Add all the common functions in this class like creating temporary table names, getting
|
|
14
14
|
the datatypes etc.
|
|
15
15
|
"""
|
|
16
|
+
import datetime
|
|
16
17
|
import json
|
|
17
18
|
import os
|
|
18
19
|
import re
|
|
19
20
|
import time
|
|
20
|
-
import uuid
|
|
21
|
+
import uuid, hashlib
|
|
21
22
|
import warnings
|
|
22
23
|
from functools import reduce
|
|
23
24
|
from inspect import getsource
|
|
@@ -39,8 +40,9 @@ from teradatasqlalchemy.types import (BIGINT, BLOB, BYTE, BYTEINT, CHAR, CLOB,
|
|
|
39
40
|
from teradataml import _version
|
|
40
41
|
from teradataml.common import td_coltype_code_to_tdtype
|
|
41
42
|
from teradataml.common.constants import (HTTPRequest, PTITableConstants,
|
|
42
|
-
PythonTypes,
|
|
43
|
-
|
|
43
|
+
PythonTypes, SQLConstants,
|
|
44
|
+
TeradataConstants,
|
|
45
|
+
TeradataReservedKeywords, TeradataTableKindConstants,
|
|
44
46
|
TeradataTypes)
|
|
45
47
|
from teradataml.common.exceptions import TeradataMlException
|
|
46
48
|
from teradataml.common.garbagecollector import GarbageCollector
|
|
@@ -280,26 +282,37 @@ class UtilFuncs():
|
|
|
280
282
|
RAISES:
|
|
281
283
|
|
|
282
284
|
EXAMPLES:
|
|
283
|
-
new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
|
|
284
|
-
new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
285
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
|
|
286
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
285
287
|
table_type = TeradataConstants.TERADATA_VIEW)
|
|
286
|
-
new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
288
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
287
289
|
table_type = TeradataConstants.TERADATA_TABLE)
|
|
290
|
+
# Example when use_short_object_name is set to True
|
|
291
|
+
>>> from teradataml.options.configure import configure
|
|
292
|
+
>>> configure.use_short_object_name = True
|
|
293
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
|
|
288
294
|
|
|
289
295
|
Output:
|
|
290
296
|
tdml_temp_table__1517501990393350 (or)
|
|
291
297
|
tdqg.tdml_temp_table__1517501990393350 (or)
|
|
292
298
|
tdml_temp_table__from_pandas_1517501990393350 (or)
|
|
293
|
-
tdqg.tdml_temp_table__from_pandas_1517501990393350
|
|
299
|
+
tdqg.tdml_temp_table__from_pandas_1517501990393350 (or)
|
|
300
|
+
ml__1749637109887272
|
|
294
301
|
"""
|
|
295
302
|
# Number of seconds since Jan 1, 1970 00:00:00
|
|
296
303
|
timestamp = time.time()
|
|
304
|
+
use_short_name = configure.use_short_object_name
|
|
297
305
|
tabname = "ml_"
|
|
298
306
|
random_string = "{}{}".format(floor(timestamp / 1000000),
|
|
299
307
|
floor(timestamp % 1000000 * 1000000 +
|
|
300
308
|
int(str(uuid.uuid4().fields[-1])[:10])))
|
|
301
|
-
|
|
309
|
+
|
|
310
|
+
# Append prefix only if use_short_object_name is False and prefix is provided.
|
|
311
|
+
if (not use_short_name) and (prefix is not None):
|
|
302
312
|
tabname = "{}_{}".format(tabname, prefix)
|
|
313
|
+
# Append prefix "tdml" when use_short_object_name is True and random string is of length 15.
|
|
314
|
+
elif use_short_name and (len(random_string)==15):
|
|
315
|
+
tabname = "tdml"
|
|
303
316
|
|
|
304
317
|
tabname = "{}_{}".format(tabname, random_string)
|
|
305
318
|
|
|
@@ -309,7 +322,8 @@ class UtilFuncs():
|
|
|
309
322
|
tabname = "\"{}\".\"{}\"".format(_get_user(), tabname)
|
|
310
323
|
return tabname
|
|
311
324
|
|
|
312
|
-
if configure.temp_object_type == TeradataConstants.
|
|
325
|
+
if (not use_short_name) and (configure.temp_object_type == TeradataConstants.
|
|
326
|
+
TERADATA_VOLATILE_TABLE):
|
|
313
327
|
from teradataml.context.context import _get_user
|
|
314
328
|
return "\"{}\".\"{}_{}\"".format(_get_user(), "vt", tabname)
|
|
315
329
|
|
|
@@ -653,13 +667,17 @@ class UtilFuncs():
|
|
|
653
667
|
|
|
654
668
|
@staticmethod
|
|
655
669
|
@collect_queryband(queryband='CreateView')
|
|
656
|
-
def _create_view(view_name, query):
|
|
670
|
+
def _create_view(view_name, query, lock_rows=False):
|
|
657
671
|
"""
|
|
658
672
|
Create a view from the given query.
|
|
659
673
|
|
|
660
674
|
PARAMETERS:
|
|
661
675
|
view_name - View name
|
|
662
676
|
query - SQL query
|
|
677
|
+
lock_rows - When set to True, teradataml DataFrame locks the corresponding row(s)
|
|
678
|
+
in underlying table(s) while accessing the data. Otherwise,
|
|
679
|
+
teradataml DataFrame access the data without locking the rows.
|
|
680
|
+
Default is False.
|
|
663
681
|
|
|
664
682
|
RAISES
|
|
665
683
|
|
|
@@ -669,7 +687,8 @@ class UtilFuncs():
|
|
|
669
687
|
EXAMPLES:
|
|
670
688
|
UtilFuncs._create_view(view_name, "select * from table_name")
|
|
671
689
|
"""
|
|
672
|
-
|
|
690
|
+
|
|
691
|
+
crt_view = SQLBundle._build_create_view(view_name, query, lock_rows)
|
|
673
692
|
try:
|
|
674
693
|
UtilFuncs._execute_ddl_statement(crt_view)
|
|
675
694
|
return True
|
|
@@ -928,9 +947,13 @@ class UtilFuncs():
|
|
|
928
947
|
EXAMPLES:
|
|
929
948
|
UtilFuncs._get_help_vtablenames()
|
|
930
949
|
"""
|
|
931
|
-
vtables = UtilFuncs._execute_query(SQLBundle._build_help_volatile_table())
|
|
932
|
-
if vtables:
|
|
933
|
-
|
|
950
|
+
vtables = UtilFuncs._execute_query(SQLBundle._build_help_volatile_table(), fetchWarnings=True)
|
|
951
|
+
if vtables and vtables[0] and vtables[1]:
|
|
952
|
+
rows, columns = vtables
|
|
953
|
+
key = TeradataTableKindConstants.VOLATILE_TABLE_NAME.value
|
|
954
|
+
# Find the index of the column matching the table name
|
|
955
|
+
col_idx = columns.index(key)
|
|
956
|
+
return [row[col_idx].strip() for row in rows if row[col_idx]]
|
|
934
957
|
return []
|
|
935
958
|
|
|
936
959
|
@staticmethod
|
|
@@ -1310,7 +1333,8 @@ class UtilFuncs():
|
|
|
1310
1333
|
|
|
1311
1334
|
return False
|
|
1312
1335
|
|
|
1313
|
-
|
|
1336
|
+
@staticmethod
|
|
1337
|
+
def _is_non_ascii(col_lst):
|
|
1314
1338
|
"""
|
|
1315
1339
|
Description:
|
|
1316
1340
|
Check if the specified string in col_lst has non-ASCII characters in it.
|
|
@@ -1569,7 +1593,7 @@ class UtilFuncs():
|
|
|
1569
1593
|
return UtilFuncs._teradata_quote_arg(args_list, quote, False)
|
|
1570
1594
|
|
|
1571
1595
|
@staticmethod
|
|
1572
|
-
def _get_metaexpr_using_columns(nodeid, column_info, with_engine=False, is_persist=False):
|
|
1596
|
+
def _get_metaexpr_using_columns(nodeid, column_info, with_engine=False, is_persist=False, **kw):
|
|
1573
1597
|
"""
|
|
1574
1598
|
This internal function takes in input node ID and column information in zipped lists format
|
|
1575
1599
|
to return metaexpr with or without engine.
|
|
@@ -1610,7 +1634,7 @@ class UtilFuncs():
|
|
|
1610
1634
|
ouptut_table = Table(db_table_name, meta,
|
|
1611
1635
|
*(Column(col_name, col_type) for col_name, col_type in column_info),
|
|
1612
1636
|
schema=db_schema)
|
|
1613
|
-
return _MetaExpression(ouptut_table, is_persist=is_persist)
|
|
1637
|
+
return _MetaExpression(ouptut_table, is_persist=is_persist, **kw)
|
|
1614
1638
|
|
|
1615
1639
|
@staticmethod
|
|
1616
1640
|
def _get_metaexpr_using_parent_metaexpr(nodeid, metaexpr):
|
|
@@ -1648,7 +1672,8 @@ class UtilFuncs():
|
|
|
1648
1672
|
col_names.append(meta_cols[col_name].name)
|
|
1649
1673
|
col_types.append(meta_cols[col_name].type)
|
|
1650
1674
|
|
|
1651
|
-
return UtilFuncs._get_metaexpr_using_columns(nodeid, zip(col_names, col_types)
|
|
1675
|
+
return UtilFuncs._get_metaexpr_using_columns(nodeid, zip(col_names, col_types),
|
|
1676
|
+
datalake=metaexpr.datalake)
|
|
1652
1677
|
|
|
1653
1678
|
@staticmethod
|
|
1654
1679
|
def _create_table_using_columns(table_name, columns_datatypes, pti_clause=None, storage=None):
|
|
@@ -2670,7 +2695,10 @@ class UtilFuncs():
|
|
|
2670
2695
|
user_function_code = ""
|
|
2671
2696
|
for func in user_functions:
|
|
2672
2697
|
# Get the source code of the user function.
|
|
2673
|
-
|
|
2698
|
+
# Note that, checking for lambda function is required for teradatamlspk UDFs
|
|
2699
|
+
# If the function is a lambda function, get the source code from __source__.
|
|
2700
|
+
func = getsource(func) if func.__code__.co_name != "<lambda>" else func.__source__
|
|
2701
|
+
|
|
2674
2702
|
# If the function have any extra space in the beginning remove it.
|
|
2675
2703
|
func = func.lstrip()
|
|
2676
2704
|
# Function can have decorator,e.g. udf as decorator, remove it.
|
|
@@ -2753,7 +2781,8 @@ class UtilFuncs():
|
|
|
2753
2781
|
MessageCodes.PYTHON_VERSION_MISMATCH_OAF)
|
|
2754
2782
|
else:
|
|
2755
2783
|
from teradataml.context import context as tdmlctx
|
|
2756
|
-
from teradataml.dbutils.dbutils import (db_python_version_diff,
|
|
2784
|
+
from teradataml.dbutils.dbutils import (db_python_version_diff,
|
|
2785
|
+
set_session_param)
|
|
2757
2786
|
set_session_param("searchuifdbpath",
|
|
2758
2787
|
UtilFuncs._get_dialect_quoted_name(tdmlctx._get_current_databasename()))
|
|
2759
2788
|
if len(db_python_version_diff()) > 0:
|
|
@@ -2798,7 +2827,8 @@ class UtilFuncs():
|
|
|
2798
2827
|
"""
|
|
2799
2828
|
|
|
2800
2829
|
# Check if OSML required packages are verified or not.
|
|
2801
|
-
from teradataml.opensource._constants import
|
|
2830
|
+
from teradataml.opensource._constants import \
|
|
2831
|
+
_packages_verified_in_vantage
|
|
2802
2832
|
_is_packages_verfied_in_vantage = _packages_verified_in_vantage.get(
|
|
2803
2833
|
func, None)
|
|
2804
2834
|
if _is_packages_verfied_in_vantage:
|
|
@@ -2855,7 +2885,8 @@ class UtilFuncs():
|
|
|
2855
2885
|
|
|
2856
2886
|
else:
|
|
2857
2887
|
# Check if the versions of Python packages are consistent between Vantage and local.
|
|
2858
|
-
from teradataml.dbutils.dbutils import
|
|
2888
|
+
from teradataml.dbutils.dbutils import \
|
|
2889
|
+
_db_python_package_version_diff
|
|
2859
2890
|
all_package_versions = _db_python_package_version_diff(packages, only_diff=False)
|
|
2860
2891
|
package_difference = \
|
|
2861
2892
|
all_package_versions[all_package_versions.vantage != all_package_versions.local]
|
|
@@ -2922,6 +2953,163 @@ class UtilFuncs():
|
|
|
2922
2953
|
tdp = preparer(td_dialect)
|
|
2923
2954
|
return tdp.quote(object_name)
|
|
2924
2955
|
|
|
2956
|
+
@staticmethod
|
|
2957
|
+
def _get_hash_value(identifier):
|
|
2958
|
+
"""
|
|
2959
|
+
DESCRIPTION:
|
|
2960
|
+
Function to get the hash value of the identifier.
|
|
2961
|
+
|
|
2962
|
+
PARAMETERS:
|
|
2963
|
+
identifier
|
|
2964
|
+
Required Argument.
|
|
2965
|
+
Specifies the identifier to be hashed.
|
|
2966
|
+
Type: str
|
|
2967
|
+
|
|
2968
|
+
RAISES:
|
|
2969
|
+
None
|
|
2970
|
+
|
|
2971
|
+
RETURNS:
|
|
2972
|
+
Hash value of the identifier.
|
|
2973
|
+
|
|
2974
|
+
EXAMPLES:
|
|
2975
|
+
UtilFuncs._get_hash_value(identifier = "tdml.alice")
|
|
2976
|
+
|
|
2977
|
+
OUTPUT:
|
|
2978
|
+
a6c64c2c_58e9_5060_b811_00839ea493ed
|
|
2979
|
+
"""
|
|
2980
|
+
# Generate a hash value using SHA-256
|
|
2981
|
+
hash_object = hashlib.sha256(identifier.encode())
|
|
2982
|
+
hash_hex = hash_object.hexdigest()
|
|
2983
|
+
|
|
2984
|
+
# Format the hash value to match the desired format
|
|
2985
|
+
formatted_hash = f"{hash_hex[:8]}_{hash_hex[8:12]}_{hash_hex[12:16]}_{hash_hex[16:20]}_{hash_hex[20:32]}"
|
|
2986
|
+
|
|
2987
|
+
return formatted_hash
|
|
2988
|
+
|
|
2989
|
+
@staticmethod
|
|
2990
|
+
def _get_http_status_phrases_description():
|
|
2991
|
+
"""
|
|
2992
|
+
DESCRIPTION:
|
|
2993
|
+
Function to get phrases and description for all HTTP status codes.
|
|
2994
|
+
|
|
2995
|
+
PARAMETERS:
|
|
2996
|
+
None
|
|
2997
|
+
|
|
2998
|
+
RETURNS:
|
|
2999
|
+
dict
|
|
3000
|
+
|
|
3001
|
+
EXAMPLES:
|
|
3002
|
+
>>> UtilFuncs._get_http_status_phrases_description()
|
|
3003
|
+
"""
|
|
3004
|
+
from http import HTTPStatus
|
|
3005
|
+
return {status.value: {"phrase": status.phrase, "description": status.description} \
|
|
3006
|
+
for status in HTTPStatus}
|
|
3007
|
+
|
|
3008
|
+
@staticmethod
|
|
3009
|
+
def _get_time_formatted_string(period):
|
|
3010
|
+
"""
|
|
3011
|
+
DESCRIPTION:
|
|
3012
|
+
Converts a string representing Period to the formatted TIMESTAMP/DATE string for snapshot queries.
|
|
3013
|
+
|
|
3014
|
+
PARAMETERS:
|
|
3015
|
+
period:
|
|
3016
|
+
Required Argument.
|
|
3017
|
+
Specifies the period string to be converted.
|
|
3018
|
+
Types: str
|
|
3019
|
+
|
|
3020
|
+
RETURNS:
|
|
3021
|
+
The formatted TIMESTAMP/DATE string.
|
|
3022
|
+
|
|
3023
|
+
RAISES:
|
|
3024
|
+
ValueError.
|
|
3025
|
+
|
|
3026
|
+
EXAMPLES:
|
|
3027
|
+
>>> UtilFuncs._get_time_formatted_string('2025-06-01 12:00:00.123')
|
|
3028
|
+
"""
|
|
3029
|
+
# Try to parse as datetime string
|
|
3030
|
+
try:
|
|
3031
|
+
for fmt in ["%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]:
|
|
3032
|
+
try:
|
|
3033
|
+
dt = datetime.datetime.strptime(period, fmt)
|
|
3034
|
+
# If input had microseconds, preserve them
|
|
3035
|
+
if "%f" in fmt and "." in period:
|
|
3036
|
+
# Remove trailing zeros and dot if needed
|
|
3037
|
+
result = "TIMESTAMP'{}'".format(dt.strftime("%Y-%m-%d %H:%M:%S.%f").rstrip("0").rstrip("."))
|
|
3038
|
+
elif "%S" in fmt:
|
|
3039
|
+
result = "TIMESTAMP'{}'".format(dt.strftime("%Y-%m-%d %H:%M:%S"))
|
|
3040
|
+
else:
|
|
3041
|
+
result = "DATE'{}'".format(dt.strftime("%Y-%m-%d"))
|
|
3042
|
+
return result
|
|
3043
|
+
except ValueError:
|
|
3044
|
+
continue
|
|
3045
|
+
raise ValueError(f"Unrecognized period format: {period}")
|
|
3046
|
+
except Exception as e:
|
|
3047
|
+
raise ValueError(f"Could not convert period: {period}") from e
|
|
3048
|
+
|
|
3049
|
+
@staticmethod
|
|
3050
|
+
def extract_table_names_from_query(query):
|
|
3051
|
+
"""
|
|
3052
|
+
Extracts all table/view names from FROM, JOIN, and ON-AS clauses in a SQL query.
|
|
3053
|
+
Handles nested queries and captures subqueries in ON (...), and ON <table> AS <alias>.
|
|
3054
|
+
"""
|
|
3055
|
+
# Regex for FROM, JOIN, and ON ... AS ... clauses
|
|
3056
|
+
# This is a simplification; for production, use a SQL parser.
|
|
3057
|
+
table_names = set()
|
|
3058
|
+
# FROM ... (possibly with nested SELECT)
|
|
3059
|
+
for match in re.finditer(r'from\s+([^\s\(\)]+)', query, re.IGNORECASE):
|
|
3060
|
+
table_names.add(match.group(1).strip())
|
|
3061
|
+
# JOIN ... (possibly with nested SELECT)
|
|
3062
|
+
for match in re.finditer(r'join\s+([^\s\(\)]+)', query, re.IGNORECASE):
|
|
3063
|
+
table_names.add(match.group(1).strip())
|
|
3064
|
+
# ON ( ... ) AS ... Nested Query in ON Clause.
|
|
3065
|
+
for match in re.finditer(r'ON\s+\(([^)]+)\)\s+AS\s+["\']?\w+["\']?', query, re.IGNORECASE):
|
|
3066
|
+
table_names.update(UtilFuncs.extract_table_names_from_query(match.group(1)))
|
|
3067
|
+
# ON <table> AS <alias> (no parentheses)
|
|
3068
|
+
for match in re.finditer(r'ON\s+(["\']?\w+["\']?(?:\.["\']?\w+["\']?)*)\s+AS\s+["\']?\w+["\']?', query, re.IGNORECASE):
|
|
3069
|
+
table_names.add(match.group(1).strip())
|
|
3070
|
+
return list(table_names)
|
|
3071
|
+
|
|
3072
|
+
@staticmethod
|
|
3073
|
+
def _get_normalize_and_deduplicate_columns(columns):
|
|
3074
|
+
"""
|
|
3075
|
+
DESCRIPTION:
|
|
3076
|
+
Function that normalizes and deduplicates a list of column names.
|
|
3077
|
+
This function processes the "columns", which can be a list of column names
|
|
3078
|
+
as strings or ColumnExpression, or a single column name/ColumnExpression.
|
|
3079
|
+
It extracts the column names, removes duplicates while preserving order,
|
|
3080
|
+
and returns the resulting list of unique column names.
|
|
3081
|
+
|
|
3082
|
+
PARAMETERS:
|
|
3083
|
+
columns:
|
|
3084
|
+
Required Argument.
|
|
3085
|
+
Specifies the column.
|
|
3086
|
+
Types: str, ColumnExpression, list of str or ColumnExpression
|
|
3087
|
+
|
|
3088
|
+
RAISES:
|
|
3089
|
+
None
|
|
3090
|
+
|
|
3091
|
+
RETURNS:
|
|
3092
|
+
list
|
|
3093
|
+
|
|
3094
|
+
EXAMPLES:
|
|
3095
|
+
>>> load_examples_data('dataframe', 'sales')
|
|
3096
|
+
>>> df = DataFrame('sales')
|
|
3097
|
+
>>> columns = [df.Jan, 'Jan', 'Feb', df.Feb, 'Mar']
|
|
3098
|
+
>>> UtilFuncs._get_normalize_and_deduplicate_columns(columns)
|
|
3099
|
+
['Jan', 'Feb', 'Mar']
|
|
3100
|
+
|
|
3101
|
+
"""
|
|
3102
|
+
columns_list = []
|
|
3103
|
+
seen = set()
|
|
3104
|
+
|
|
3105
|
+
for column in (columns if isinstance(columns, list) else [columns]):
|
|
3106
|
+
name = column if isinstance(column, str) else column.name
|
|
3107
|
+
if name not in seen:
|
|
3108
|
+
seen.add(name)
|
|
3109
|
+
columns_list.append(name)
|
|
3110
|
+
|
|
3111
|
+
return columns_list
|
|
2925
3112
|
|
|
3113
|
+
# Keeping at the end to avoid circular dependency
|
|
2926
3114
|
from teradataml.common.aed_utils import AedUtils
|
|
2927
3115
|
from teradataml.dbutils.filemgr import remove_file
|
|
@@ -115,20 +115,26 @@ class AEDContext:
|
|
|
115
115
|
"""
|
|
116
116
|
# Define extension to load AED library depending on the OS platform.
|
|
117
117
|
os_type = platform.system()
|
|
118
|
+
arch_suffix = ""
|
|
118
119
|
if (os_type == "Windows"):
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
extension = "dll"
|
|
121
|
+
lib_name = "aed"
|
|
121
122
|
elif (os_type == "Darwin"):
|
|
122
|
-
|
|
123
|
-
|
|
123
|
+
extension = "dylib"
|
|
124
|
+
lib_name = "libaed"
|
|
124
125
|
else:
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
# Linux case
|
|
127
|
+
extension = "so"
|
|
128
|
+
lib_name = "libaed"
|
|
129
|
+
# Only apply architecture suffix for aarch64/ARM64
|
|
130
|
+
machine = platform.uname().machine
|
|
131
|
+
if machine == "aarch64":
|
|
132
|
+
arch_suffix = "_" + machine
|
|
133
|
+
|
|
127
134
|
# TODO:: Use logger when it is available.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
"
|
|
131
|
-
"{0}_0_1.{1}".format(self.lib_name, self.extension))
|
|
135
|
+
from teradataml import _TDML_DIRECTORY
|
|
136
|
+
libPathName = os.path.join(os.sep, _TDML_DIRECTORY, "lib",
|
|
137
|
+
"{0}_0_1{1}.{2}".format(lib_name, arch_suffix, extension))
|
|
132
138
|
try:
|
|
133
139
|
elecommon = cdll.LoadLibrary(libPathName)
|
|
134
140
|
except Exception as err:
|