teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +182 -13
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +8 -13
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +16 -1
- teradataml/analytics/utils.py +60 -1
- teradataml/automl/__init__.py +290 -106
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +29 -10
- teradataml/automl/data_transformation.py +11 -0
- teradataml/automl/feature_engineering.py +64 -4
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +1 -1
- teradataml/clients/auth_client.py +12 -8
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/common/constants.py +71 -26
- teradataml/common/exceptions.py +32 -0
- teradataml/common/messagecodes.py +28 -0
- teradataml/common/messages.py +13 -4
- teradataml/common/sqlbundle.py +3 -2
- teradataml/common/utils.py +345 -45
- teradataml/context/context.py +259 -93
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +1 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -1
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/url_data.csv +10 -9
- teradataml/dataframe/copy_to.py +38 -27
- teradataml/dataframe/data_transfer.py +61 -45
- teradataml/dataframe/dataframe.py +1110 -132
- teradataml/dataframe/dataframe_utils.py +73 -27
- teradataml/dataframe/functions.py +1070 -9
- teradataml/dataframe/sql.py +750 -959
- teradataml/dbutils/dbutils.py +33 -13
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/utils.py +4 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/_base.py +12 -157
- teradataml/options/configure.py +24 -9
- teradataml/scriptmgmt/UserEnv.py +317 -39
- teradataml/scriptmgmt/lls_utils.py +456 -135
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +897 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +406 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/store/__init__.py +1 -1
- teradataml/table_operators/Apply.py +16 -1
- teradataml/table_operators/Script.py +20 -1
- teradataml/table_operators/query_generator.py +4 -21
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/internal_buffer.py +22 -2
- teradataml/utils/utils.py +0 -1
- teradataml/utils/validators.py +318 -58
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
teradataml/common/utils.py
CHANGED
|
@@ -13,50 +13,50 @@ by other classes which can be reused according to the need.
|
|
|
13
13
|
Add all the common functions in this class like creating temporary table names, getting
|
|
14
14
|
the datatypes etc.
|
|
15
15
|
"""
|
|
16
|
-
|
|
16
|
+
import datetime
|
|
17
17
|
import json
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
import time
|
|
18
21
|
import uuid
|
|
22
|
+
import warnings
|
|
23
|
+
from functools import reduce
|
|
24
|
+
from inspect import getsource
|
|
19
25
|
from math import floor
|
|
20
|
-
|
|
21
|
-
import
|
|
22
|
-
import re, requests
|
|
26
|
+
|
|
27
|
+
import requests
|
|
23
28
|
import sqlalchemy
|
|
24
|
-
from pathlib import Path
|
|
25
29
|
from numpy import number
|
|
26
30
|
from sqlalchemy import Column, MetaData, Table
|
|
27
|
-
|
|
28
|
-
from
|
|
31
|
+
from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
|
|
32
|
+
from teradatasql import OperationalError
|
|
33
|
+
from teradatasqlalchemy.dialect import dialect as td_dialect
|
|
34
|
+
from teradatasqlalchemy.dialect import preparer
|
|
35
|
+
from teradatasqlalchemy.types import (BIGINT, BLOB, BYTE, BYTEINT, CHAR, CLOB,
|
|
36
|
+
DATE, DECIMAL, FLOAT, INTEGER, NUMBER,
|
|
37
|
+
SMALLINT, TIME, TIMESTAMP, VARBYTE,
|
|
38
|
+
VARCHAR, _TDType)
|
|
29
39
|
|
|
30
40
|
from teradataml import _version
|
|
31
|
-
from teradataml.
|
|
41
|
+
from teradataml.common import td_coltype_code_to_tdtype
|
|
42
|
+
from teradataml.common.constants import (HTTPRequest, PTITableConstants,
|
|
43
|
+
PythonTypes, SQLConstants,
|
|
44
|
+
TeradataConstants,
|
|
45
|
+
TeradataReservedKeywords,
|
|
46
|
+
TeradataTypes)
|
|
32
47
|
from teradataml.common.exceptions import TeradataMlException
|
|
33
|
-
from teradataml.common.
|
|
48
|
+
from teradataml.common.garbagecollector import GarbageCollector
|
|
34
49
|
from teradataml.common.messagecodes import MessageCodes
|
|
50
|
+
from teradataml.common.messages import Messages
|
|
35
51
|
from teradataml.common.sqlbundle import SQLBundle
|
|
36
|
-
from teradataml.common import
|
|
37
|
-
|
|
38
|
-
from teradataml.
|
|
39
|
-
from teradataml.common.garbagecollector import GarbageCollector
|
|
40
|
-
from teradataml.common.constants import TeradataConstants, PTITableConstants, \
|
|
41
|
-
TableOperatorConstants, HTTPRequest
|
|
42
|
-
from teradataml.common.warnings import VantageRuntimeWarning
|
|
52
|
+
from teradataml.common.warnings import (OneTimeUserWarning,
|
|
53
|
+
VantageRuntimeWarning)
|
|
54
|
+
from teradataml.context import context as tdmlctx
|
|
43
55
|
from teradataml.options.configure import configure
|
|
44
56
|
from teradataml.options.display import display
|
|
45
|
-
from teradataml.common.constants import TeradataReservedKeywords, TeradataConstants
|
|
46
|
-
|
|
47
|
-
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
48
|
-
from teradatasqlalchemy.types import _TDType
|
|
49
|
-
from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT,
|
|
50
|
-
DECIMAL, FLOAT, NUMBER)
|
|
51
|
-
from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
|
|
52
|
-
from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
|
|
53
|
-
from teradatasqlalchemy.types import (CHAR, VARCHAR, CLOB)
|
|
54
|
-
from functools import reduce
|
|
55
|
-
import warnings
|
|
56
57
|
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
57
58
|
from teradataml.utils.utils import execute_sql
|
|
58
59
|
from teradataml.utils.validators import _Validators
|
|
59
|
-
from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class UtilFuncs():
|
|
@@ -282,36 +282,48 @@ class UtilFuncs():
|
|
|
282
282
|
RAISES:
|
|
283
283
|
|
|
284
284
|
EXAMPLES:
|
|
285
|
-
new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
|
|
286
|
-
new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
285
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
|
|
286
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
287
287
|
table_type = TeradataConstants.TERADATA_VIEW)
|
|
288
|
-
new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
288
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas",
|
|
289
289
|
table_type = TeradataConstants.TERADATA_TABLE)
|
|
290
|
+
# Example when use_short_object_name is set to True
|
|
291
|
+
>>> from teradataml.options.configure import configure
|
|
292
|
+
>>> configure.use_short_object_name = True
|
|
293
|
+
>>> new_table_name = UtilFuncs._generate_temp_table_name(user='tdqg', prefix="from_pandas")
|
|
290
294
|
|
|
291
295
|
Output:
|
|
292
296
|
tdml_temp_table__1517501990393350 (or)
|
|
293
297
|
tdqg.tdml_temp_table__1517501990393350 (or)
|
|
294
298
|
tdml_temp_table__from_pandas_1517501990393350 (or)
|
|
295
|
-
tdqg.tdml_temp_table__from_pandas_1517501990393350
|
|
299
|
+
tdqg.tdml_temp_table__from_pandas_1517501990393350 (or)
|
|
300
|
+
ml__1749637109887272
|
|
296
301
|
"""
|
|
297
302
|
# Number of seconds since Jan 1, 1970 00:00:00
|
|
298
303
|
timestamp = time.time()
|
|
304
|
+
use_short_name = configure.use_short_object_name
|
|
299
305
|
tabname = "ml_"
|
|
300
306
|
random_string = "{}{}".format(floor(timestamp / 1000000),
|
|
301
307
|
floor(timestamp % 1000000 * 1000000 +
|
|
302
308
|
int(str(uuid.uuid4().fields[-1])[:10])))
|
|
303
|
-
|
|
309
|
+
|
|
310
|
+
# Append prefix only if use_short_object_name is False and prefix is provided.
|
|
311
|
+
if (not use_short_name) and (prefix is not None):
|
|
304
312
|
tabname = "{}_{}".format(tabname, prefix)
|
|
313
|
+
# Append prefix "tdml" when use_short_object_name is True and random string is of length 15.
|
|
314
|
+
elif use_short_name and (len(random_string)==15):
|
|
315
|
+
tabname = "tdml"
|
|
305
316
|
|
|
306
317
|
tabname = "{}_{}".format(tabname, random_string)
|
|
307
|
-
|
|
318
|
+
|
|
308
319
|
# ELE-6710 - Use database user associated with the current context for volatile tables.
|
|
309
320
|
if table_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
310
321
|
from teradataml.context.context import _get_user
|
|
311
322
|
tabname = "\"{}\".\"{}\"".format(_get_user(), tabname)
|
|
312
323
|
return tabname
|
|
313
324
|
|
|
314
|
-
if configure.temp_object_type == TeradataConstants.
|
|
325
|
+
if (not use_short_name) and (configure.temp_object_type == TeradataConstants.
|
|
326
|
+
TERADATA_VOLATILE_TABLE):
|
|
315
327
|
from teradataml.context.context import _get_user
|
|
316
328
|
return "\"{}\".\"{}_{}\"".format(_get_user(), "vt", tabname)
|
|
317
329
|
|
|
@@ -454,9 +466,10 @@ class UtilFuncs():
|
|
|
454
466
|
>>> def udf(a, b): return a + b
|
|
455
467
|
>>> func = UtilFuncs._serialize_and_encode(udf)
|
|
456
468
|
"""
|
|
457
|
-
from dill import dumps as dill_dumps
|
|
458
469
|
from base64 import b64encode as base64_b64encode
|
|
459
470
|
|
|
471
|
+
from dill import dumps as dill_dumps
|
|
472
|
+
|
|
460
473
|
return base64_b64encode(dill_dumps(obj, recurse=True))
|
|
461
474
|
|
|
462
475
|
@staticmethod
|
|
@@ -1275,7 +1288,7 @@ class UtilFuncs():
|
|
|
1275
1288
|
return UtilFuncs._teradata_quote_arg(keyword, "\"", False)
|
|
1276
1289
|
|
|
1277
1290
|
return keyword
|
|
1278
|
-
|
|
1291
|
+
|
|
1279
1292
|
def _contains_space(item):
|
|
1280
1293
|
"""
|
|
1281
1294
|
Check if the specified string in item has spaces or tabs in it.
|
|
@@ -1307,11 +1320,12 @@ class UtilFuncs():
|
|
|
1307
1320
|
# If the input is a list, check each element
|
|
1308
1321
|
if isinstance(item, list):
|
|
1309
1322
|
# Check each item in the list
|
|
1310
|
-
return any(UtilFuncs._contains_space(col) for col in item)
|
|
1323
|
+
return any(UtilFuncs._contains_space(col) for col in item)
|
|
1311
1324
|
|
|
1312
|
-
return False
|
|
1313
|
-
|
|
1314
|
-
|
|
1325
|
+
return False
|
|
1326
|
+
|
|
1327
|
+
@staticmethod
|
|
1328
|
+
def _is_non_ascii(col_lst):
|
|
1315
1329
|
"""
|
|
1316
1330
|
Description:
|
|
1317
1331
|
Check if the specified string in col_lst has non-ASCII characters in it.
|
|
@@ -2476,7 +2490,14 @@ class UtilFuncs():
|
|
|
2476
2490
|
# and determine the system type accordingly.
|
|
2477
2491
|
if tbl_operator is None:
|
|
2478
2492
|
from teradataml.context.context import _get_database_version
|
|
2479
|
-
|
|
2493
|
+
if int(_get_database_version().split(".")[0]) < 20:
|
|
2494
|
+
return False
|
|
2495
|
+
# If the database version is 20 or higher, check if the system is VCL or not.
|
|
2496
|
+
try:
|
|
2497
|
+
res = UtilFuncs._execute_query("SELECT 1 WHERE TD_GetSystemType('PRODUCT') = 'VCL';")
|
|
2498
|
+
return True if res else False
|
|
2499
|
+
except OperationalError:
|
|
2500
|
+
return True
|
|
2480
2501
|
|
|
2481
2502
|
return tbl_operator == "apply"
|
|
2482
2503
|
|
|
@@ -2503,7 +2524,6 @@ class UtilFuncs():
|
|
|
2503
2524
|
return "python" if UtilFuncs._is_lake() else \
|
|
2504
2525
|
'{}/bin/python3'.format(configure.indb_install_location)
|
|
2505
2526
|
|
|
2506
|
-
|
|
2507
2527
|
def _is_view(tablename):
|
|
2508
2528
|
"""
|
|
2509
2529
|
DESCRIPTION:
|
|
@@ -2589,7 +2609,7 @@ class UtilFuncs():
|
|
|
2589
2609
|
raise tdml_e
|
|
2590
2610
|
except Exception as exc:
|
|
2591
2611
|
raise exc
|
|
2592
|
-
|
|
2612
|
+
|
|
2593
2613
|
def _get_env_name(col=None):
|
|
2594
2614
|
"""
|
|
2595
2615
|
DESCRIPTION:
|
|
@@ -2665,7 +2685,10 @@ class UtilFuncs():
|
|
|
2665
2685
|
user_function_code = ""
|
|
2666
2686
|
for func in user_functions:
|
|
2667
2687
|
# Get the source code of the user function.
|
|
2668
|
-
|
|
2688
|
+
# Note that, checking for lambda function is required for teradatamlspk UDFs
|
|
2689
|
+
# If the function is a lambda function, get the source code from __source__.
|
|
2690
|
+
func = getsource(func) if func.__code__.co_name != "<lambda>" else func.__source__
|
|
2691
|
+
|
|
2669
2692
|
# If the function have any extra space in the beginning remove it.
|
|
2670
2693
|
func = func.lstrip()
|
|
2671
2694
|
# Function can have decorator,e.g. udf as decorator, remove it.
|
|
@@ -2704,6 +2727,283 @@ class UtilFuncs():
|
|
|
2704
2727
|
"""
|
|
2705
2728
|
return '"{}"."{}"'.format(schema_name, table_name)
|
|
2706
2729
|
|
|
2730
|
+
def _check_python_version_diff(env = None):
|
|
2731
|
+
"""
|
|
2732
|
+
DESCRIPTION:
|
|
2733
|
+
Internal function to check the python version difference between Vantage and local.
|
|
2734
|
+
|
|
2735
|
+
PARAMETERS:
|
|
2736
|
+
env:
|
|
2737
|
+
Optional Argument.
|
|
2738
|
+
Specifies the user environment for Vantage Cloud Lake.
|
|
2739
|
+
Types: str, object of class UserEnv
|
|
2740
|
+
Default Value: None
|
|
2741
|
+
|
|
2742
|
+
RAISES:
|
|
2743
|
+
TeradataMlException
|
|
2744
|
+
|
|
2745
|
+
RETURNS:
|
|
2746
|
+
None.
|
|
2747
|
+
|
|
2748
|
+
EXAMPLES:
|
|
2749
|
+
>>> self._check_python_version_diff(env)
|
|
2750
|
+
"""
|
|
2751
|
+
if env:
|
|
2752
|
+
# Get the Python interpreter version of the user environment.
|
|
2753
|
+
from teradataml.scriptmgmt.lls_utils import list_user_envs
|
|
2754
|
+
from teradataml.scriptmgmt.UserEnv import UserEnv
|
|
2755
|
+
env_list = list_user_envs()
|
|
2756
|
+
user_env_name = env.env_name if isinstance(env, UserEnv) else env
|
|
2757
|
+
env_base_version = env_list[env_list['env_name'] == user_env_name].base_env_name.values
|
|
2758
|
+
# Check if the user environment is not found, then return.
|
|
2759
|
+
if len(env_base_version) == 0:
|
|
2760
|
+
return
|
|
2761
|
+
python_env = env_base_version[0].split("_")[1]
|
|
2762
|
+
|
|
2763
|
+
# Get the Python interpreter version of the local environment.
|
|
2764
|
+
from teradataml.context import context as tdmlctx
|
|
2765
|
+
python_local = tdmlctx.python_version_local.rsplit(".", 1)[0]
|
|
2766
|
+
# Check if the Python interpreter major versions are consistent between Lake user environment and local.
|
|
2767
|
+
# If not, raise an exception.
|
|
2768
|
+
if python_env != python_local:
|
|
2769
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_VERSION_MISMATCH_OAF,
|
|
2770
|
+
python_env, python_local),
|
|
2771
|
+
MessageCodes.PYTHON_VERSION_MISMATCH_OAF)
|
|
2772
|
+
else:
|
|
2773
|
+
from teradataml.context import context as tdmlctx
|
|
2774
|
+
from teradataml.dbutils.dbutils import (db_python_version_diff,
|
|
2775
|
+
set_session_param)
|
|
2776
|
+
set_session_param("searchuifdbpath",
|
|
2777
|
+
UtilFuncs._get_dialect_quoted_name(tdmlctx._get_current_databasename()))
|
|
2778
|
+
if len(db_python_version_diff()) > 0:
|
|
2779
|
+
# Raise exception when python versions don't match between Vantage and local.
|
|
2780
|
+
py_major_vantage_version = tdmlctx.python_version_vantage.rsplit(".", 1)[0]
|
|
2781
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_VERSION_MISMATCH,
|
|
2782
|
+
tdmlctx.python_version_vantage, py_major_vantage_version),
|
|
2783
|
+
MessageCodes.PYTHON_VERSION_MISMATCH)
|
|
2784
|
+
|
|
2785
|
+
def _check_package_version_diff(func, packages, env=None):
|
|
2786
|
+
"""
|
|
2787
|
+
DESCRIPTION:
|
|
2788
|
+
Internal function to process packages differences between Vantage and local.
|
|
2789
|
+
Note:
|
|
2790
|
+
* Raises a warning if the versions of certain Python packages are not consistent between Vantage and local.
|
|
2791
|
+
|
|
2792
|
+
PARAMETERS:
|
|
2793
|
+
func:
|
|
2794
|
+
Required Argument.
|
|
2795
|
+
Specifies the function name.
|
|
2796
|
+
Types: str
|
|
2797
|
+
|
|
2798
|
+
packages:
|
|
2799
|
+
Required Argument.
|
|
2800
|
+
Specifies the list of package names.
|
|
2801
|
+
Types: list of str
|
|
2802
|
+
|
|
2803
|
+
env:
|
|
2804
|
+
Optional Argument.
|
|
2805
|
+
Specifies the user environment for Vantage Cloud Lake.
|
|
2806
|
+
Types: str, object of class UserEnv
|
|
2807
|
+
Default Value: None
|
|
2808
|
+
|
|
2809
|
+
RETURNS:
|
|
2810
|
+
None
|
|
2811
|
+
|
|
2812
|
+
RAISES:
|
|
2813
|
+
OneTimeUserWarning
|
|
2814
|
+
|
|
2815
|
+
EXAMPLES:
|
|
2816
|
+
self._process_package_differences("apply", ["dill"], env)
|
|
2817
|
+
"""
|
|
2818
|
+
|
|
2819
|
+
# Check if OSML required packages are verified or not.
|
|
2820
|
+
from teradataml.opensource._constants import \
|
|
2821
|
+
_packages_verified_in_vantage
|
|
2822
|
+
_is_packages_verfied_in_vantage = _packages_verified_in_vantage.get(
|
|
2823
|
+
func, None)
|
|
2824
|
+
if _is_packages_verfied_in_vantage:
|
|
2825
|
+
return
|
|
2826
|
+
|
|
2827
|
+
if env:
|
|
2828
|
+
from teradataml.scriptmgmt.lls_utils import get_env
|
|
2829
|
+
from teradataml.scriptmgmt.UserEnv import UserEnv
|
|
2830
|
+
env = env if isinstance(env, UserEnv) else get_env(env)
|
|
2831
|
+
env_pkg_df = env.libs
|
|
2832
|
+
pkgs_dict = dict(zip(env_pkg_df['name'], env_pkg_df['version']))
|
|
2833
|
+
|
|
2834
|
+
from importlib.metadata import version
|
|
2835
|
+
warning_raised = False
|
|
2836
|
+
strr = []
|
|
2837
|
+
for pkg in packages:
|
|
2838
|
+
env_version = pkgs_dict.get(pkg)
|
|
2839
|
+
local_version = version(pkg)
|
|
2840
|
+
# Write the requirements file listing all the related packages and their versions
|
|
2841
|
+
# if the versions Python packages are not consistent between Vantage and local.
|
|
2842
|
+
if env_version != local_version:
|
|
2843
|
+
warning_raised = True
|
|
2844
|
+
strr.append(f"{pkg}=={local_version}")
|
|
2845
|
+
|
|
2846
|
+
# If there are differences in package versions, display a warning message to the user.
|
|
2847
|
+
# about the package differences and the requirements file created for the user to install the packages
|
|
2848
|
+
if warning_raised:
|
|
2849
|
+
file_name = f"requirements_{func}.txt"
|
|
2850
|
+
req_file = os.path.join(GarbageCollector._get_temp_dir_name(), file_name)
|
|
2851
|
+
with open(req_file, "w") as f:
|
|
2852
|
+
f.write("\n".join(strr))
|
|
2853
|
+
|
|
2854
|
+
packages = "{}".format(packages[0]) if len(packages) == 1 else\
|
|
2855
|
+
"', '".join(packages[:-1]) + "' and '" + packages[-1]
|
|
2856
|
+
|
|
2857
|
+
if func == "apply":
|
|
2858
|
+
warning_msg = f"The version of certain Python packages are not consistent between Lake "\
|
|
2859
|
+
f"user environment and local. Teradata recommends to maintain same version of '{packages}' "\
|
|
2860
|
+
f"between Lake user environment and local for '{func}'."
|
|
2861
|
+
else:
|
|
2862
|
+
_packages_verified_in_vantage[func] = True
|
|
2863
|
+
warning_msg = "The versions of certain Python packages are not consistent between "\
|
|
2864
|
+
"Lake user environment and local. OpenSourceML compares the versions of '{}' "\
|
|
2865
|
+
f"(and also matches the patterns of these packages) used by 'td_{func}'. "\
|
|
2866
|
+
"Teradata recommends same versions for all the Python packages between Lake "\
|
|
2867
|
+
"user environment and local."
|
|
2868
|
+
|
|
2869
|
+
req = f"\nA requirements file listing all '{func}' " + \
|
|
2870
|
+
f"related packages and their versions has been written to '{req_file}'. "+ \
|
|
2871
|
+
"Update the Lake user environment with the required packages.\n"
|
|
2872
|
+
|
|
2873
|
+
warning_msg += req
|
|
2874
|
+
warnings.warn(warning_msg.format(packages), category=OneTimeUserWarning)
|
|
2875
|
+
|
|
2876
|
+
else:
|
|
2877
|
+
# Check if the versions of Python packages are consistent between Vantage and local.
|
|
2878
|
+
from teradataml.dbutils.dbutils import \
|
|
2879
|
+
_db_python_package_version_diff
|
|
2880
|
+
all_package_versions = _db_python_package_version_diff(packages, only_diff=False)
|
|
2881
|
+
package_difference = \
|
|
2882
|
+
all_package_versions[all_package_versions.vantage != all_package_versions.local]
|
|
2883
|
+
# If there are differences in package versions, raise a warning.
|
|
2884
|
+
if package_difference.shape[0] > 0:
|
|
2885
|
+
strr = []
|
|
2886
|
+
# Write the requirements file listing all the related packages and their versions.
|
|
2887
|
+
for rec in all_package_versions.to_records():
|
|
2888
|
+
strr.append(f"{rec[1]}=={rec[2]}")
|
|
2889
|
+
file_name = f"requirements_{func}.txt"
|
|
2890
|
+
req_file = os.path.join(GarbageCollector._get_temp_dir_name(), file_name)
|
|
2891
|
+
with open(req_file, "w") as f:
|
|
2892
|
+
f.write("\n".join(strr))
|
|
2893
|
+
|
|
2894
|
+
packages = "{}".format(packages[0]) if len(packages) == 1 else\
|
|
2895
|
+
"', '".join(packages[:-1]) + "' and '" + packages[-1]
|
|
2896
|
+
|
|
2897
|
+
if func in ["map_row", "map_partition"]:
|
|
2898
|
+
warning_msg = f"The version of certain Python packages are not consistent between "\
|
|
2899
|
+
"Vantage and local. User can identify them using db_python_package_version_diff() "\
|
|
2900
|
+
f"function. Teradata recommends to maintain same version of '{packages}' "\
|
|
2901
|
+
f"between Vantage and local for '{func}'."
|
|
2902
|
+
else:
|
|
2903
|
+
_packages_verified_in_vantage[func] = True
|
|
2904
|
+
warning_msg = "The versions of certain Python packages are not consistent between "\
|
|
2905
|
+
"Vantage and local. User can identify them using db_python_package_version_diff() "\
|
|
2906
|
+
"function. OpenSourceML compares the versions of '{}' (and also matches the "\
|
|
2907
|
+
f"patterns of these packages) used by 'td_{func}'. Teradata "\
|
|
2908
|
+
"recommends to maintain same versions for all the Python packages between Vantage "\
|
|
2909
|
+
"and local."
|
|
2910
|
+
|
|
2911
|
+
# Display a warning message to the user about the package differences
|
|
2912
|
+
# and the requirements file created for the user to install the packages.
|
|
2913
|
+
req = f"\nA requirements file listing all '{func}' " + \
|
|
2914
|
+
f"related packages and their versions has been written to '{req_file}'.\n"
|
|
2915
|
+
|
|
2916
|
+
warning_msg += req
|
|
2917
|
+
warnings.warn(warning_msg.format(packages), category=OneTimeUserWarning)
|
|
2918
|
+
|
|
2919
|
+
@staticmethod
|
|
2920
|
+
def _get_dialect_quoted_name(object_name):
|
|
2921
|
+
"""
|
|
2922
|
+
DESCRIPTION:
|
|
2923
|
+
Function to quote the SQL identifiers as per teradatasqlalchemy's quoting rules.
|
|
2924
|
+
|
|
2925
|
+
PARAMETERS:
|
|
2926
|
+
object_name
|
|
2927
|
+
Required Argument.
|
|
2928
|
+
Specifies the name of the SQL identifier to be quoted.
|
|
2929
|
+
Type: str
|
|
2930
|
+
|
|
2931
|
+
RAISES:
|
|
2932
|
+
None
|
|
2933
|
+
|
|
2934
|
+
RETURNS:
|
|
2935
|
+
Quoted object name.
|
|
2936
|
+
|
|
2937
|
+
EXAMPLES:
|
|
2938
|
+
_get_dialect_quoted_name(object_name = "tdml.alice")
|
|
2939
|
+
|
|
2940
|
+
OUTPUT:
|
|
2941
|
+
'"tdml.alice"'
|
|
2942
|
+
"""
|
|
2943
|
+
tdp = preparer(td_dialect)
|
|
2944
|
+
return tdp.quote(object_name)
|
|
2945
|
+
|
|
2946
|
+
@staticmethod
|
|
2947
|
+
def _get_http_status_phrases_description():
|
|
2948
|
+
"""
|
|
2949
|
+
DESCRIPTION:
|
|
2950
|
+
Function to get phrases and description for all HTTP status codes.
|
|
2951
|
+
|
|
2952
|
+
PARAMETERS:
|
|
2953
|
+
None
|
|
2954
|
+
|
|
2955
|
+
RETURNS:
|
|
2956
|
+
dict
|
|
2957
|
+
|
|
2958
|
+
EXAMPLES:
|
|
2959
|
+
>>> UtilFuncs._get_http_status_phrases_description()
|
|
2960
|
+
"""
|
|
2961
|
+
from http import HTTPStatus
|
|
2962
|
+
return {status.value: {"phrase": status.phrase, "description": status.description} \
|
|
2963
|
+
for status in HTTPStatus}
|
|
2707
2964
|
|
|
2965
|
+
@staticmethod
|
|
2966
|
+
def _get_time_formatted_string(period):
|
|
2967
|
+
"""
|
|
2968
|
+
DESCRIPTION:
|
|
2969
|
+
Converts a string representing Period to the formatted TIMESTAMP/DATE string for snapshot queries.
|
|
2970
|
+
|
|
2971
|
+
PARAMETERS:
|
|
2972
|
+
period:
|
|
2973
|
+
Required Argument.
|
|
2974
|
+
Specifies the period string to be converted.
|
|
2975
|
+
Types: str
|
|
2976
|
+
|
|
2977
|
+
RETURNS:
|
|
2978
|
+
The formatted TIMESTAMP/DATE string.
|
|
2979
|
+
|
|
2980
|
+
RAISES:
|
|
2981
|
+
ValueError.
|
|
2982
|
+
|
|
2983
|
+
EXAMPLES:
|
|
2984
|
+
>>> UtilFuncs._get_time_formatted_string('2025-06-01 12:00:00.123')
|
|
2985
|
+
"""
|
|
2986
|
+
# Try to parse as datetime string
|
|
2987
|
+
try:
|
|
2988
|
+
for fmt in ["%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]:
|
|
2989
|
+
try:
|
|
2990
|
+
dt = datetime.datetime.strptime(period, fmt)
|
|
2991
|
+
# If input had microseconds, preserve them
|
|
2992
|
+
if "%f" in fmt and "." in period:
|
|
2993
|
+
# Remove trailing zeros and dot if needed
|
|
2994
|
+
result = "TIMESTAMP'{}'".format(dt.strftime("%Y-%m-%d %H:%M:%S.%f").rstrip("0").rstrip("."))
|
|
2995
|
+
elif "%S" in fmt:
|
|
2996
|
+
result = "TIMESTAMP'{}'".format(dt.strftime("%Y-%m-%d %H:%M:%S"))
|
|
2997
|
+
else:
|
|
2998
|
+
result = "DATE'{}'".format(dt.strftime("%Y-%m-%d"))
|
|
2999
|
+
return result
|
|
3000
|
+
except ValueError:
|
|
3001
|
+
continue
|
|
3002
|
+
raise ValueError(f"Unrecognized period format: {period}")
|
|
3003
|
+
except Exception as e:
|
|
3004
|
+
raise ValueError(f"Could not convert period: {period}") from e
|
|
3005
|
+
|
|
3006
|
+
|
|
3007
|
+
# Keeping at the end to avoid circular dependency
|
|
2708
3008
|
from teradataml.common.aed_utils import AedUtils
|
|
2709
3009
|
from teradataml.dbutils.filemgr import remove_file
|