teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (107) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +86 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +7 -12
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +15 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +2 -2
  17. teradataml/common/constants.py +61 -26
  18. teradataml/common/messagecodes.py +2 -1
  19. teradataml/common/messages.py +5 -4
  20. teradataml/common/utils.py +255 -37
  21. teradataml/context/context.py +225 -87
  22. teradataml/data/apriori_example.json +22 -0
  23. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  24. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  25. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  26. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  27. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  29. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  30. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  31. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  32. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  34. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  35. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  37. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  38. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  39. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  40. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  41. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  42. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  43. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  47. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  48. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  49. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  51. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  53. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  54. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  55. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  56. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  57. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  58. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  59. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  60. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  61. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  62. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  63. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  65. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  66. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  67. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  68. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  69. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  70. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  71. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  72. teradataml/data/ner_dict.csv +8 -0
  73. teradataml/data/ner_input_eng.csv +7 -0
  74. teradataml/data/ner_rule.csv +5 -0
  75. teradataml/data/pos_input.csv +40 -0
  76. teradataml/data/tdnerextractor_example.json +14 -0
  77. teradataml/data/teradataml_example.json +13 -0
  78. teradataml/data/textmorph_example.json +5 -0
  79. teradataml/data/to_num_data.csv +4 -0
  80. teradataml/data/tochar_data.csv +5 -0
  81. teradataml/data/trans_dense.csv +16 -0
  82. teradataml/data/trans_sparse.csv +55 -0
  83. teradataml/dataframe/copy_to.py +37 -26
  84. teradataml/dataframe/data_transfer.py +61 -45
  85. teradataml/dataframe/dataframe.py +130 -50
  86. teradataml/dataframe/dataframe_utils.py +15 -2
  87. teradataml/dataframe/functions.py +109 -9
  88. teradataml/dataframe/sql.py +328 -76
  89. teradataml/dbutils/dbutils.py +33 -13
  90. teradataml/dbutils/filemgr.py +14 -10
  91. teradataml/lib/aed_0_1.dll +0 -0
  92. teradataml/opensource/_base.py +6 -157
  93. teradataml/options/configure.py +4 -5
  94. teradataml/scriptmgmt/UserEnv.py +305 -38
  95. teradataml/scriptmgmt/lls_utils.py +376 -130
  96. teradataml/store/__init__.py +1 -1
  97. teradataml/table_operators/Apply.py +16 -1
  98. teradataml/table_operators/Script.py +20 -1
  99. teradataml/table_operators/table_operator_util.py +58 -9
  100. teradataml/utils/dtypes.py +2 -1
  101. teradataml/utils/internal_buffer.py +22 -2
  102. teradataml/utils/validators.py +313 -57
  103. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +89 -14
  104. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +107 -77
  105. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  106. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  107. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -775,7 +775,7 @@ class _ModelTraining:
775
775
 
776
776
  # Defining training data
777
777
  data_types = ['lasso', 'rfe', 'pca']
778
- trainng_datas = tuple(DataFrame(self.table_name_mapping[f'{data_type}_train']) for data_type in data_types)
778
+ trainng_datas = tuple(DataFrame(self.data_mapping[f'{data_type}_train']) for data_type in data_types)
779
779
 
780
780
  if self.task_type == "Classification":
781
781
  response_values = trainng_datas[0].get(self.target_column).drop_duplicate().get_values().flatten().tolist()
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Unpublished work.
3
- Copyright (c) 2024 by Teradata Corporation. All rights reserved.
3
+ Copyright (c) 2025 by Teradata Corporation. All rights reserved.
4
4
  TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
5
5
  Primary Owner: gouri.patwardhan@teradata.com
6
6
  Secondary Owner: Pradeep.Garre@teradata.com
@@ -67,7 +67,7 @@ class _AuthWorkflow:
67
67
  dict
68
68
  """
69
69
  # Extract the pem file name without extension.
70
- kid = pathlib.Path(self.state.get('pem_file')).stem
70
+ kid = pathlib.Path(self.state.get('pem_file')).stem if not self.state.get('kid') else self.state['kid']
71
71
  header = {
72
72
  "alg": "RS256",
73
73
  "kid": kid,
@@ -20,7 +20,7 @@ from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
20
20
  from teradatasqlalchemy import (CHAR, CLOB)
21
21
  from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
22
22
  from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
23
- INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
23
+ INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
24
24
  INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
25
25
  INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
26
26
  INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
@@ -82,7 +82,7 @@ class TeradataConstants(Enum):
82
82
  # Order of operators
83
83
  # shouldn't be changed. This is the order in which join condition is tested - first, operators
84
84
  # with two characters and then the operators with single character.
85
- SUPPORTED_ENGINES = {"ENGINE_SQL" : {"name" : "sqle", "file" : "sqlengine_alias_definitions"}}
85
+ SUPPORTED_ENGINES = {"ENGINE_SQL": {"name": "sqle", "file": "sqlengine_alias_definitions"}}
86
86
  SUPPORTED_VANTAGE_VERSIONS = {"vantage1.0": "v1.0", "vantage1.1": "v1.1",
87
87
  "vantage1.3": "v1.3", "vantage2.0": "v1.1"}
88
88
  RANGE_SEPARATORS = [":"]
@@ -90,7 +90,7 @@ class TeradataConstants(Enum):
90
90
 
91
91
  class AEDConstants(Enum):
92
92
  AED_NODE_NOT_EXECUTED = 0
93
- AED_NODE_EXECUTED = 1
93
+ AED_NODE_EXECUTED = 1
94
94
  AED_DB_OBJECT_NAME_BUFFER_SIZE = 128
95
95
  AED_NODE_TYPE_BUFFER_SIZE = 32
96
96
  AED_ASSIGN_DROP_EXISITING_COLUMNS = "Y"
@@ -148,7 +148,7 @@ class TeradataTableKindConstants(Enum):
148
148
  TABLE = "table"
149
149
  VIEW = "view"
150
150
  TEMP = "temp"
151
- ALL = "all"
151
+ ALL = "all"
152
152
  ML_PATTERN = "ml_%"
153
153
  VOLATILE_TABLE_NAME = 'Table Name'
154
154
  REGULAR_TABLE_NAME = 'TableName'
@@ -220,7 +220,7 @@ class ModelCatalogingConstants(Enum):
220
220
 
221
221
 
222
222
  class CopyToConstants(Enum):
223
- DBAPI_BATCHSIZE = 16383
223
+ DBAPI_BATCHSIZE = 16383
224
224
 
225
225
 
226
226
  class PTITableConstants(Enum):
@@ -377,34 +377,34 @@ class GeospatialConstants(Enum):
377
377
  "buffer": lambda x: "ST_Buffer",
378
378
  "contains": lambda x: "ST_Contains",
379
379
  "crosses": lambda x: "ST_Crosses",
380
- "difference": lambda x: "ST_Difference", # M
380
+ "difference": lambda x: "ST_Difference", # M
381
381
  "disjoint": lambda x: "ST_Disjoint",
382
- "distance": lambda x: "ST_Distance", # M
383
- "distance_3D": lambda x: "ST_3DDistance", # M
382
+ "distance": lambda x: "ST_Distance", # M
383
+ "distance_3D": lambda x: "ST_3DDistance", # M
384
384
  "geom_equals": lambda x: "ST_Equals",
385
385
  "intersection": lambda x: "ST_Intersection",
386
- #"intersect": lambda x: "ST_Intersect", # M
386
+ # "intersect": lambda x: "ST_Intersect", # M
387
387
  "make_2D": lambda x: "Make_2D",
388
388
  "overlaps": lambda x: "ST_Overlaps",
389
389
  "relates": lambda x: "ST_Relate",
390
390
  "simplify": lambda x: "SimplifyPreserveTopology",
391
- "sym_difference": lambda x: "ST_SymDifference", # M
391
+ "sym_difference": lambda x: "ST_SymDifference", # M
392
392
  "touches": lambda x: "ST_Touches",
393
393
  "transform": lambda x: "ST_Transform",
394
394
  "union": lambda x: "ST_Union",
395
395
  "within": lambda x: "ST_Within",
396
- "wkb_geom_to_sql": lambda x: "ST_WKBToSQL", # M
397
- "wkt_geom_to_sql": lambda x: "ST_WKTToSQL", # M
396
+ "wkb_geom_to_sql": lambda x: "ST_WKBToSQL", # M
397
+ "wkt_geom_to_sql": lambda x: "ST_WKTToSQL", # M
398
398
  "set_srid": lambda x: "ST_SRID",
399
399
 
400
400
  ## *** Geometry Type ST_Point Methods *** ##
401
401
  "set_x": lambda x: "ST_X",
402
402
  "set_y": lambda x: "ST_Y",
403
403
  "set_z": lambda x: "ST_Z",
404
- "spherical_buffer": lambda x: "ST_SphericalBufferMBR", # M
405
- "spherical_distance": lambda x: "ST_SphericalDistance", # M
406
- "spheroidal_buffer": lambda x: "ST_SpheroidalBufferMBR", # M
407
- "spheroidal_distance": lambda x: "ST_SpheroidalDistance", # M
404
+ "spherical_buffer": lambda x: "ST_SphericalBufferMBR", # M
405
+ "spherical_distance": lambda x: "ST_SphericalDistance", # M
406
+ "spheroidal_buffer": lambda x: "ST_SpheroidalBufferMBR", # M
407
+ "spheroidal_distance": lambda x: "ST_SpheroidalDistance", # M
408
408
 
409
409
  ## *** Geometry Type ST_LineString Methods *** ##
410
410
  "line_interpolate_point": lambda x: "ST_Line_Interpolate_Point",
@@ -509,9 +509,18 @@ class TableOperatorConstants(Enum):
509
509
  "delimiter(' ') " \
510
510
  "returns('package VARCHAR({2}), " \
511
511
  "version VARCHAR({2})'))"
512
-
512
+
513
513
  SCRIPT_LIST_FILES_QUERY = "SELECT DISTINCT * FROM SCRIPT (SCRIPT_COMMAND " \
514
- "('ls ./{}') RETURNS ('Files VARCHAR({})'))"
514
+ "('ls ./{}') RETURNS ('Files VARCHAR({})'))"
515
+
516
+
517
+ # OpenBlas by default is multi-threaded, needs to be set to single-threaded.
518
+ OPENBLAS_NUM_THREADS = "OPENBLAS_NUM_THREADS=1"
519
+
520
+ # Query to create a DataFrame with a range of numbers.
521
+ RANGE_QUERY = "WITH RECURSIVE NumberSeries (id) AS (SELECT id AS id from {0} "\
522
+ "UNION ALL SELECT id {3} {1} FROM NumberSeries WHERE id {3} {1} {4} {2}) "\
523
+ "SELECT id FROM NumberSeries;"
515
524
 
516
525
  class ValibConstants(Enum):
517
526
  # A dictionary that maps teradataml name of the exposed VALIB function name
@@ -1355,7 +1364,7 @@ class SQLFunctionConstants(Enum):
1355
1364
  "regexp_replace": "REGEXP_REPLACE",
1356
1365
  "regexp_similar": "REGEXP_SIMILAR",
1357
1366
  "regexp_substr": "REGEXP_SUBSTR",
1358
-
1367
+
1359
1368
  # DateTime Functions
1360
1369
  'week_begin': 'td_week_begin',
1361
1370
  'week_start': 'td_week_begin',
@@ -1426,6 +1435,7 @@ class TeradataReservedKeywords(Enum):
1426
1435
  "TYPE"
1427
1436
  ]
1428
1437
 
1438
+
1429
1439
  class TeradataAnalyticFunctionTypes(Enum):
1430
1440
  SQLE = "FASTPATH"
1431
1441
  UAF = "UAF"
@@ -1435,15 +1445,15 @@ class TeradataAnalyticFunctionTypes(Enum):
1435
1445
 
1436
1446
 
1437
1447
  class TeradataAnalyticFunctionInfo(Enum):
1438
-
1439
- FASTPATH = {"func_type": "sqle", "lowest_version": "16.20", "display_function_type_name" :"SQLE"}
1448
+ FASTPATH = {"func_type": "sqle", "lowest_version": "16.20", "display_function_type_name": "SQLE"}
1440
1449
  UAF = {"func_type": "uaf", "lowest_version": "17.20", "display_function_type_name": "UAF",
1441
- "metadata_class" : "_AnlyFuncMetadataUAF"}
1450
+ "metadata_class": "_AnlyFuncMetadataUAF"}
1442
1451
  TABLE_OPERATOR = {"func_type": "tableoperator", "lowest_version": "17.00 ",
1443
- "display_function_type_name" :"TABLE OPERATOR"}
1452
+ "display_function_type_name": "TABLE OPERATOR"}
1444
1453
  BYOM = {"func_type": "byom", "lowest_version": None, "display_function_type_name": "BYOM"}
1445
1454
  STORED_PROCEDURE = {"func_type": "storedprocedure", "lowest_version": "17.20", "display_function_type_name": "UAF",
1446
- "metadata_class" : "_AnlyFuncMetadataUAF"}
1455
+ "metadata_class": "_AnlyFuncMetadataUAF"}
1456
+
1447
1457
 
1448
1458
  class TeradataUAFSpecificArgs(Enum):
1449
1459
  INPUT_MODE = "input_mode"
@@ -1451,9 +1461,11 @@ class TeradataUAFSpecificArgs(Enum):
1451
1461
  OUTPUT_FMT_INDEX = "output_fmt_index"
1452
1462
  OUTPUT_FMT_INDEX_STYLE = "output_fmt_index_style"
1453
1463
 
1464
+
1454
1465
  class Query(Enum):
1455
1466
  VANTAGE_VERSION = "SELECT InfoData FROM DBC.DBCInfoV where InfoKey = 'VERSION'"
1456
1467
 
1468
+
1457
1469
  class DriverEscapeFunctions(Enum):
1458
1470
  # Holds variables for the teradatasql driver escape functions to be used
1459
1471
  NATIVE_SQL = "{fn teradata_nativesql}"
@@ -1494,6 +1506,13 @@ class AsyncStatusColumns(Enum):
1494
1506
  ADDITIONAL_DETAILS = "Additional Details"
1495
1507
 
1496
1508
 
1509
+ class AsyncOpStatus(Enum):
1510
+ # Holds valid status for asynchronous operatiosns in UES.
1511
+ FILE_INSTALLED = "File Installed"
1512
+ ERRED = "Errored"
1513
+ FINISHED = "Finished"
1514
+
1515
+
1497
1516
  class CloudProvider(Enum):
1498
1517
  # Holds variable names for Cloud Providers.
1499
1518
  AWS = "AWS"
@@ -1503,7 +1522,8 @@ class CloudProvider(Enum):
1503
1522
  X_MS_VERSION = "2019-12-12"
1504
1523
  X_MS_BLOB_TYPE = "BlockBlob"
1505
1524
 
1506
- class SessionParamsSQL:
1525
+
1526
+ class SessionParamsSQL(Enum):
1507
1527
  # Holds the SQL Statements for Session params.
1508
1528
  TIMEZONE = "SET TIME ZONE {}"
1509
1529
  ACCOUNT = "SET SESSION ACCOUNT = '{}' FOR {}"
@@ -1523,7 +1543,8 @@ class SessionParamsSQL:
1523
1543
  QUERY_BAND = "SET QUERY_BAND = {} FOR {}"
1524
1544
  UDFSEARCHPATH = "SET SESSION UDFSEARCHPATH = {} FOR FUNCTION = {}"
1525
1545
 
1526
- class SessionParamsPythonNames:
1546
+
1547
+ class SessionParamsPythonNames(Enum):
1527
1548
  # Holds the SQL Statements for Session params.
1528
1549
  TIMEZONE = "Session Time Zone"
1529
1550
  ACCOUNT = "Account Name"
@@ -1532,3 +1553,17 @@ class SessionParamsPythonNames:
1532
1553
  DATABASE = "Current DataBase"
1533
1554
  DATEFORM = 'Current DateForm'
1534
1555
 
1556
+
1557
+ class AutoMLConstants(Enum):
1558
+ # List stores feature selection methods
1559
+ FEATURE_SELECTION_MTDS = ["lasso", "rfe", "pca"]
1560
+
1561
+
1562
+ class AuthMechs(Enum):
1563
+ """
1564
+ Enum to hold permitted values for authentication mechanism.
1565
+ """
1566
+ OAUTH = "OAuth"
1567
+ JWT = "JWT"
1568
+ PAT = "PAT"
1569
+ BASIC = "BASIC"
@@ -125,6 +125,7 @@ class ErrorInfoCodes(Enum):
125
125
  SPECIFY_AT_LEAST_ONE_ARG = 'TDML_2037'
126
126
  CANNOT_USE_TOGETHER_WITH = 'TDML_2042'
127
127
  TABLE_DOES_NOT_EXIST = 'TDML_2046'
128
+ DEPENDENT_METHOD = 'TDML_2113'
128
129
 
129
130
  # Reserved for Generic Error Messages: 2121 - 2199
130
131
  RESERVED_KEYWORD = 'TDML_2121'
@@ -440,4 +441,4 @@ class MessageCodes(Enum):
440
441
  CONNECTION_PARAMS = "Required connection parameters are missing. Connection parameters should either be " \
441
442
  "explicitly passed to function or specified using a configuration file, or setting up " \
442
443
  "the environment variables."
443
-
444
+ DEPENDENT_METHOD = "Method(s) {} must be called before calling '{}'."
@@ -195,7 +195,8 @@ class Messages():
195
195
  [ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND],
196
196
  [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE],
197
197
  [ErrorInfoCodes.SET_REQUIRED_PARAMS, MessageCodes.SET_REQUIRED_PARAMS],
198
- [ErrorInfoCodes.MISSING_ARGS, MessageCodes.CONNECTION_PARAMS]
198
+ [ErrorInfoCodes.MISSING_ARGS, MessageCodes.CONNECTION_PARAMS],
199
+ [ErrorInfoCodes.DEPENDENT_METHOD, MessageCodes.DEPENDENT_METHOD]
199
200
  ]
200
201
 
201
202
  @staticmethod
@@ -224,11 +225,11 @@ class Messages():
224
225
 
225
226
  """
226
227
  for msg in Messages.__messages:
227
- if msg[1] == messagecode :
228
+ if msg[1] == messagecode:
228
229
  message = "{}({}) {}".format(Messages.__standard_message, msg[0].value, msg[1].value)
229
230
  if len(variables) != 0:
230
231
  message = message.format(*variables)
231
- if len(kwargs) != 0 :
232
+ if len(kwargs) != 0:
232
233
  message = "{} {}".format(message, kwargs)
233
-
234
+ break
234
235
  return message
@@ -13,50 +13,48 @@ by other classes which can be reused according to the need.
13
13
  Add all the common functions in this class like creating temporary table names, getting
14
14
  the datatypes etc.
15
15
  """
16
- from inspect import getsource
17
16
  import json
17
+ import os
18
+ import re
19
+ import time
18
20
  import uuid
21
+ import warnings
22
+ from functools import reduce
23
+ from inspect import getsource
19
24
  from math import floor
20
- import os, itertools
21
- import time
22
- import re, requests
25
+
26
+ import requests
23
27
  import sqlalchemy
24
- from pathlib import Path
25
28
  from numpy import number
26
29
  from sqlalchemy import Column, MetaData, Table
27
-
28
- from teradataml.context.context import get_connection
30
+ from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
31
+ from teradatasql import OperationalError
32
+ from teradatasqlalchemy.dialect import dialect as td_dialect
33
+ from teradatasqlalchemy.dialect import preparer
34
+ from teradatasqlalchemy.types import (BIGINT, BLOB, BYTE, BYTEINT, CHAR, CLOB,
35
+ DATE, DECIMAL, FLOAT, INTEGER, NUMBER,
36
+ SMALLINT, TIME, TIMESTAMP, VARBYTE,
37
+ VARCHAR, _TDType)
29
38
 
30
39
  from teradataml import _version
31
- from teradataml.context import context as tdmlctx
40
+ from teradataml.common import td_coltype_code_to_tdtype
41
+ from teradataml.common.constants import (HTTPRequest, PTITableConstants,
42
+ PythonTypes, TeradataConstants,
43
+ TeradataReservedKeywords,
44
+ TeradataTypes)
32
45
  from teradataml.common.exceptions import TeradataMlException
33
- from teradataml.common.messages import Messages
46
+ from teradataml.common.garbagecollector import GarbageCollector
34
47
  from teradataml.common.messagecodes import MessageCodes
48
+ from teradataml.common.messages import Messages
35
49
  from teradataml.common.sqlbundle import SQLBundle
36
- from teradataml.common import td_coltype_code_to_tdtype
37
- from teradataml.common.constants import PythonTypes
38
- from teradataml.common.constants import TeradataTypes
39
- from teradataml.common.garbagecollector import GarbageCollector
40
- from teradataml.common.constants import TeradataConstants, PTITableConstants, \
41
- TableOperatorConstants, HTTPRequest
42
- from teradataml.common.warnings import VantageRuntimeWarning
50
+ from teradataml.common.warnings import (OneTimeUserWarning,
51
+ VantageRuntimeWarning)
52
+ from teradataml.context import context as tdmlctx
43
53
  from teradataml.options.configure import configure
44
54
  from teradataml.options.display import display
45
- from teradataml.common.constants import TeradataReservedKeywords, TeradataConstants
46
-
47
- from teradataml.utils.internal_buffer import _InternalBuffer
48
- from teradatasqlalchemy.types import _TDType
49
- from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT,
50
- DECIMAL, FLOAT, NUMBER)
51
- from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
52
- from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
53
- from teradatasqlalchemy.types import (CHAR, VARCHAR, CLOB)
54
- from functools import reduce
55
- import warnings
56
55
  from teradataml.telemetry_utils.queryband import collect_queryband
57
56
  from teradataml.utils.utils import execute_sql
58
57
  from teradataml.utils.validators import _Validators
59
- from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
60
58
 
61
59
 
62
60
  class UtilFuncs():
@@ -304,7 +302,7 @@ class UtilFuncs():
304
302
  tabname = "{}_{}".format(tabname, prefix)
305
303
 
306
304
  tabname = "{}_{}".format(tabname, random_string)
307
-
305
+
308
306
  # ELE-6710 - Use database user associated with the current context for volatile tables.
309
307
  if table_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
310
308
  from teradataml.context.context import _get_user
@@ -454,9 +452,10 @@ class UtilFuncs():
454
452
  >>> def udf(a, b): return a + b
455
453
  >>> func = UtilFuncs._serialize_and_encode(udf)
456
454
  """
457
- from dill import dumps as dill_dumps
458
455
  from base64 import b64encode as base64_b64encode
459
456
 
457
+ from dill import dumps as dill_dumps
458
+
460
459
  return base64_b64encode(dill_dumps(obj, recurse=True))
461
460
 
462
461
  @staticmethod
@@ -1275,7 +1274,7 @@ class UtilFuncs():
1275
1274
  return UtilFuncs._teradata_quote_arg(keyword, "\"", False)
1276
1275
 
1277
1276
  return keyword
1278
-
1277
+
1279
1278
  def _contains_space(item):
1280
1279
  """
1281
1280
  Check if the specified string in item has spaces or tabs in it.
@@ -1307,10 +1306,10 @@ class UtilFuncs():
1307
1306
  # If the input is a list, check each element
1308
1307
  if isinstance(item, list):
1309
1308
  # Check each item in the list
1310
- return any(UtilFuncs._contains_space(col) for col in item)
1309
+ return any(UtilFuncs._contains_space(col) for col in item)
1310
+
1311
+ return False
1311
1312
 
1312
- return False
1313
-
1314
1313
  def _is_ascii(col_lst):
1315
1314
  """
1316
1315
  Description:
@@ -2476,7 +2475,14 @@ class UtilFuncs():
2476
2475
  # and determine the system type accordingly.
2477
2476
  if tbl_operator is None:
2478
2477
  from teradataml.context.context import _get_database_version
2479
- return int(_get_database_version().split(".")[0]) >= 20
2478
+ if int(_get_database_version().split(".")[0]) < 20:
2479
+ return False
2480
+ # If the database version is 20 or higher, check if the system is VCL or not.
2481
+ try:
2482
+ res = UtilFuncs._execute_query("SELECT 1 WHERE TD_GetSystemType('PRODUCT') = 'VCL';")
2483
+ return True if res else False
2484
+ except OperationalError:
2485
+ return True
2480
2486
 
2481
2487
  return tbl_operator == "apply"
2482
2488
 
@@ -2503,7 +2509,6 @@ class UtilFuncs():
2503
2509
  return "python" if UtilFuncs._is_lake() else \
2504
2510
  '{}/bin/python3'.format(configure.indb_install_location)
2505
2511
 
2506
-
2507
2512
  def _is_view(tablename):
2508
2513
  """
2509
2514
  DESCRIPTION:
@@ -2589,7 +2594,7 @@ class UtilFuncs():
2589
2594
  raise tdml_e
2590
2595
  except Exception as exc:
2591
2596
  raise exc
2592
-
2597
+
2593
2598
  def _get_env_name(col=None):
2594
2599
  """
2595
2600
  DESCRIPTION:
@@ -2704,6 +2709,219 @@ class UtilFuncs():
2704
2709
  """
2705
2710
  return '"{}"."{}"'.format(schema_name, table_name)
2706
2711
 
2712
+ def _check_python_version_diff(env = None):
2713
+ """
2714
+ DESCRIPTION:
2715
+ Internal function to check the python version difference between Vantage and local.
2716
+
2717
+ PARAMETERS:
2718
+ env:
2719
+ Optional Argument.
2720
+ Specifies the user environment for Vantage Cloud Lake.
2721
+ Types: str, object of class UserEnv
2722
+ Default Value: None
2723
+
2724
+ RAISES:
2725
+ TeradataMlException
2726
+
2727
+ RETURNS:
2728
+ None.
2729
+
2730
+ EXAMPLES:
2731
+ >>> self._check_python_version_diff(env)
2732
+ """
2733
+ if env:
2734
+ # Get the Python interpreter version of the user environment.
2735
+ from teradataml.scriptmgmt.lls_utils import list_user_envs
2736
+ from teradataml.scriptmgmt.UserEnv import UserEnv
2737
+ env_list = list_user_envs()
2738
+ user_env_name = env.env_name if isinstance(env, UserEnv) else env
2739
+ env_base_version = env_list[env_list['env_name'] == user_env_name].base_env_name.values
2740
+ # Check if the user environment is not found, then return.
2741
+ if len(env_base_version) == 0:
2742
+ return
2743
+ python_env = env_base_version[0].split("_")[1]
2744
+
2745
+ # Get the Python interpreter version of the local environment.
2746
+ from teradataml.context import context as tdmlctx
2747
+ python_local = tdmlctx.python_version_local.rsplit(".", 1)[0]
2748
+ # Check if the Python interpreter major versions are consistent between Lake user environment and local.
2749
+ # If not, raise an exception.
2750
+ if python_env != python_local:
2751
+ raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_VERSION_MISMATCH_OAF,
2752
+ python_env, python_local),
2753
+ MessageCodes.PYTHON_VERSION_MISMATCH_OAF)
2754
+ else:
2755
+ from teradataml.context import context as tdmlctx
2756
+ from teradataml.dbutils.dbutils import (db_python_version_diff, set_session_param)
2757
+ set_session_param("searchuifdbpath",
2758
+ UtilFuncs._get_dialect_quoted_name(tdmlctx._get_current_databasename()))
2759
+ if len(db_python_version_diff()) > 0:
2760
+ # Raise exception when python versions don't match between Vantage and local.
2761
+ py_major_vantage_version = tdmlctx.python_version_vantage.rsplit(".", 1)[0]
2762
+ raise TeradataMlException(Messages.get_message(MessageCodes.PYTHON_VERSION_MISMATCH,
2763
+ tdmlctx.python_version_vantage, py_major_vantage_version),
2764
+ MessageCodes.PYTHON_VERSION_MISMATCH)
2765
+
2766
+ def _check_package_version_diff(func, packages, env=None):
2767
+ """
2768
+ DESCRIPTION:
2769
+ Internal function to process packages differences between Vantage and local.
2770
+ Note:
2771
+ * Raises a warning if the versions of certain Python packages are not consistent between Vantage and local.
2772
+
2773
+ PARAMETERS:
2774
+ func:
2775
+ Required Argument.
2776
+ Specifies the function name.
2777
+ Types: str
2778
+
2779
+ packages:
2780
+ Required Argument.
2781
+ Specifies the list of package names.
2782
+ Types: list of str
2783
+
2784
+ env:
2785
+ Optional Argument.
2786
+ Specifies the user environment for Vantage Cloud Lake.
2787
+ Types: str, object of class UserEnv
2788
+ Default Value: None
2789
+
2790
+ RETURNS:
2791
+ None
2792
+
2793
+ RAISES:
2794
+ OneTimeUserWarning
2795
+
2796
+ EXAMPLES:
2797
+ self._process_package_differences("apply", ["dill"], env)
2798
+ """
2799
+
2800
+ # Check if OSML required packages are verified or not.
2801
+ from teradataml.opensource._constants import _packages_verified_in_vantage
2802
+ _is_packages_verfied_in_vantage = _packages_verified_in_vantage.get(
2803
+ func, None)
2804
+ if _is_packages_verfied_in_vantage:
2805
+ return
2806
+
2807
+ if env:
2808
+ from teradataml.scriptmgmt.lls_utils import get_env
2809
+ from teradataml.scriptmgmt.UserEnv import UserEnv
2810
+ env = env if isinstance(env, UserEnv) else get_env(env)
2811
+ env_pkg_df = env.libs
2812
+ pkgs_dict = dict(zip(env_pkg_df['name'], env_pkg_df['version']))
2813
+
2814
+ from importlib.metadata import version
2815
+ warning_raised = False
2816
+ strr = []
2817
+ for pkg in packages:
2818
+ env_version = pkgs_dict.get(pkg)
2819
+ local_version = version(pkg)
2820
+ # Write the requirements file listing all the related packages and their versions
2821
+ # if the versions Python packages are not consistent between Vantage and local.
2822
+ if env_version != local_version:
2823
+ warning_raised = True
2824
+ strr.append(f"{pkg}=={local_version}")
2825
+
2826
+ # If there are differences in package versions, display a warning message to the user.
2827
+ # about the package differences and the requirements file created for the user to install the packages
2828
+ if warning_raised:
2829
+ file_name = f"requirements_{func}.txt"
2830
+ req_file = os.path.join(GarbageCollector._get_temp_dir_name(), file_name)
2831
+ with open(req_file, "w") as f:
2832
+ f.write("\n".join(strr))
2833
+
2834
+ packages = "{}".format(packages[0]) if len(packages) == 1 else\
2835
+ "', '".join(packages[:-1]) + "' and '" + packages[-1]
2836
+
2837
+ if func == "apply":
2838
+ warning_msg = f"The version of certain Python packages are not consistent between Lake "\
2839
+ f"user environment and local. Teradata recommends to maintain same version of '{packages}' "\
2840
+ f"between Lake user environment and local for '{func}'."
2841
+ else:
2842
+ _packages_verified_in_vantage[func] = True
2843
+ warning_msg = "The versions of certain Python packages are not consistent between "\
2844
+ "Lake user environment and local. OpenSourceML compares the versions of '{}' "\
2845
+ f"(and also matches the patterns of these packages) used by 'td_{func}'. "\
2846
+ "Teradata recommends same versions for all the Python packages between Lake "\
2847
+ "user environment and local."
2848
+
2849
+ req = f"\nA requirements file listing all '{func}' " + \
2850
+ f"related packages and their versions has been written to '{req_file}'. "+ \
2851
+ "Update the Lake user environment with the required packages.\n"
2852
+
2853
+ warning_msg += req
2854
+ warnings.warn(warning_msg.format(packages), category=OneTimeUserWarning)
2855
+
2856
+ else:
2857
+ # Check if the versions of Python packages are consistent between Vantage and local.
2858
+ from teradataml.dbutils.dbutils import _db_python_package_version_diff
2859
+ all_package_versions = _db_python_package_version_diff(packages, only_diff=False)
2860
+ package_difference = \
2861
+ all_package_versions[all_package_versions.vantage != all_package_versions.local]
2862
+ # If there are differences in package versions, raise a warning.
2863
+ if package_difference.shape[0] > 0:
2864
+ strr = []
2865
+ # Write the requirements file listing all the related packages and their versions.
2866
+ for rec in all_package_versions.to_records():
2867
+ strr.append(f"{rec[1]}=={rec[2]}")
2868
+ file_name = f"requirements_{func}.txt"
2869
+ req_file = os.path.join(GarbageCollector._get_temp_dir_name(), file_name)
2870
+ with open(req_file, "w") as f:
2871
+ f.write("\n".join(strr))
2872
+
2873
+ packages = "{}".format(packages[0]) if len(packages) == 1 else\
2874
+ "', '".join(packages[:-1]) + "' and '" + packages[-1]
2875
+
2876
+ if func in ["map_row", "map_partition"]:
2877
+ warning_msg = f"The version of certain Python packages are not consistent between "\
2878
+ "Vantage and local. User can identify them using db_python_package_version_diff() "\
2879
+ f"function. Teradata recommends to maintain same version of '{packages}' "\
2880
+ f"between Vantage and local for '{func}'."
2881
+ else:
2882
+ _packages_verified_in_vantage[func] = True
2883
+ warning_msg = "The versions of certain Python packages are not consistent between "\
2884
+ "Vantage and local. User can identify them using db_python_package_version_diff() "\
2885
+ "function. OpenSourceML compares the versions of '{}' (and also matches the "\
2886
+ f"patterns of these packages) used by 'td_{func}'. Teradata "\
2887
+ "recommends to maintain same versions for all the Python packages between Vantage "\
2888
+ "and local."
2889
+
2890
+ # Display a warning message to the user about the package differences
2891
+ # and the requirements file created for the user to install the packages.
2892
+ req = f"\nA requirements file listing all '{func}' " + \
2893
+ f"related packages and their versions has been written to '{req_file}'.\n"
2894
+
2895
+ warning_msg += req
2896
+ warnings.warn(warning_msg.format(packages), category=OneTimeUserWarning)
2897
+
2898
+ @staticmethod
2899
+ def _get_dialect_quoted_name(object_name):
2900
+ """
2901
+ DESCRIPTION:
2902
+ Function to quote the SQL identifiers as per teradatasqlalchemy's quoting rules.
2903
+
2904
+ PARAMETERS:
2905
+ object_name
2906
+ Required Argument.
2907
+ Specifies the name of the SQL identifier to be quoted.
2908
+ Type: str
2909
+
2910
+ RAISES:
2911
+ None
2912
+
2913
+ RETURNS:
2914
+ Quoted object name.
2915
+
2916
+ EXAMPLES:
2917
+ _get_dialect_quoted_name(object_name = "tdml.alice")
2918
+
2919
+ OUTPUT:
2920
+ '"tdml.alice"'
2921
+ """
2922
+ tdp = preparer(td_dialect)
2923
+ return tdp.quote(object_name)
2924
+
2707
2925
 
2708
2926
  from teradataml.common.aed_utils import AedUtils
2709
2927
  from teradataml.dbutils.filemgr import remove_file