teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -316,14 +316,6 @@ class Apply(TableOperator):
316
316
  is_local_order,
317
317
  sort_ascending,
318
318
  nulls_first)
319
-
320
- # Set the variable specific to this child class.
321
- self.apply_command = apply_command
322
- self.env_name = env_name if env_name is not None else get_user_env()
323
- self.style = style
324
- self.returns = returns
325
- self._skip_argument_validation = False
326
-
327
319
  # Create AnalyticsWrapperUtils instance which contains validation functions.
328
320
  # This is required for is_default_or_not check.
329
321
  # Rest all validation is done using _Validators
@@ -332,20 +324,42 @@ class Apply(TableOperator):
332
324
  # Perform argument validation for arguments specific to this class.
333
325
  self.__arg_info_matrix = []
334
326
 
335
- self.__arg_info_matrix.append(["style", self.style, True, (str), True, ['CSV']])
336
- self.__arg_info_matrix.append(["env_name", self.env_name, False, (str, UserEnv), True])
337
- self.__arg_info_matrix.append(["apply_command", self.apply_command, False, (str), True])
338
- self.__arg_info_matrix.append(["returns", self.returns, True, (dict), True])
339
-
327
+ self.__arg_info_matrix.append(["style", style, True, (str), True, ['CSV']])
328
+ self.__arg_info_matrix.append(["env_name", env_name, False, (str, UserEnv), True])
329
+ self.__arg_info_matrix.append(["apply_command", apply_command, False, (str), True])
330
+ self.__arg_info_matrix.append(["returns", returns, True, (dict), True])
331
+ self._skip_argument_validation = False
340
332
  # Perform the function argument validations.
341
333
  self.__apply__validate()
342
334
 
343
- self.env = self.env_name if isinstance(self.env_name, UserEnv) else get_env(self.env_name)
335
+ # If user do not pass environment, get the default environment.
336
+ if env_name is None:
337
+ env_name = get_user_env()
338
+ self._open_af_env = env_name
339
+
340
+ # Set the variable specific to this child class.
341
+ self.apply_command = apply_command
342
+ self.env_name = env_name if isinstance(env_name, str) else env_name.env_name
343
+ self.style = style
344
+ self.returns = returns
345
+
346
+
347
+ @property
348
+ def env(self):
349
+ """
350
+ DESCRIPTION:
351
+ Getter to get environment.
352
+
353
+ RETURNS:
354
+ bool
355
+
356
+ RAISES:
357
+ None
358
+ """
359
+ if isinstance(self._open_af_env, str):
360
+ self._open_af_env = get_env(self._open_af_env)
344
361
 
345
- # User can specify object of UserEnv class. Or if environment is already created just pass
346
- # remote user environment name as string.
347
- if isinstance(self.env_name, UserEnv):
348
- self.env_name = self.env_name.env_name
362
+ return self._open_af_env
349
363
 
350
364
  @property
351
365
  def skip_argument_validation(self):
@@ -1701,7 +1701,9 @@ class Script(TableOperator):
1701
1701
  gc_on_quit=True, quote=False,
1702
1702
  table_type=table_type)
1703
1703
  try:
1704
- if output_style == OutputStyle.OUTPUT_TABLE.value:
1704
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
1705
+ UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
1706
+ elif output_style == OutputStyle.OUTPUT_TABLE.value:
1705
1707
  UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
1706
1708
  else:
1707
1709
  UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
@@ -458,7 +458,9 @@ class TableOperator:
458
458
  )
459
459
 
460
460
  try:
461
- if output_style == OutputStyle.OUTPUT_TABLE.value:
461
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
462
+ UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
463
+ elif output_style == OutputStyle.OUTPUT_TABLE.value:
462
464
  UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
463
465
  else:
464
466
  UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
@@ -481,6 +481,9 @@ class QueryGenerator:
481
481
  return configure.read_nos_function_mapping.upper()
482
482
  elif "WriteNOS".lower() == function_name.lower():
483
483
  return configure.write_nos_function_mapping.upper()
484
+ # If Table Operator function is IMAGE2MATRIX, then return alias name as TD_IMAGE2MATRIX.
485
+ elif "IMAGE2MATRIX".lower() == function_name.lower():
486
+ return "TD_IMAGE2MATRIX"
484
487
 
485
488
  engine_name = UtilFuncs._get_engine_name(self._engine)
486
489
 
@@ -231,7 +231,9 @@ class TableOperatorQueryGenerator(QueryGenerator):
231
231
  using_clause = ""
232
232
  # If the function is a NOS function, then USING clause is needed.
233
233
  if self._function_name.lower() in [configure.write_nos_function_mapping.lower(),
234
- configure.read_nos_function_mapping.lower()]:
234
+ configure.read_nos_function_mapping.lower(),
235
+ "td_image2matrix"
236
+ ]:
235
237
  using_clause = "USING"
236
238
  invocation_sql = "{0}\n\t{1}{2}".format(invocation_sql, using_clause, self.__OTHER_ARG_CLAUSE)
237
239
 
@@ -24,6 +24,7 @@ from teradataml.utils.utils import execute_sql
24
24
  from teradataml.utils.validators import _Validators
25
25
  from functools import partial
26
26
  from inspect import isfunction, getsource
27
+ from pathlib import Path
27
28
 
28
29
 
29
30
  class _TableOperatorUtils:
@@ -281,7 +282,8 @@ class _TableOperatorUtils:
281
282
  """
282
283
  # Validate the user defined function.
283
284
 
284
- if self.operation == TableOperatorConstants.UDF_OP.value:
285
+ if self.operation in [TableOperatorConstants.UDF_OP.value,\
286
+ TableOperatorConstants.REGISTER_OP.value]:
285
287
  for udf_function in self.user_function:
286
288
  if not isfunction(udf_function):
287
289
  raise TypeError(Messages.get_message(
@@ -330,20 +332,30 @@ class _TableOperatorUtils:
330
332
  EXAMPLES:
331
333
  self.__create_user_script()
332
334
  """
333
- # Generate script name and alias, and add entry to a Garbage Collector.
334
- # script_entry is the string that is added to Garbage collector.
335
- # It has the format "<databasename>"."<file_id>".
336
- self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
337
-
338
- # Get the converters to use with pandas.read_csv, and to correctly
339
- # typecast the numeric data.
340
- python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
341
- for col in self.data._metaexpr.c]
342
- input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
343
-
344
- python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
345
- for type_ in list(self.returns.values())]
346
- output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
335
+ # If operation is register, then generate script name based on the
336
+ # user function name and return type.
337
+ # It has the format "tdml_udf_name_<registered_name>_udf_type_<return_type>_register.py"
338
+ if self.operation == TableOperatorConstants.REGISTER_OP.value:
339
+ registered_name = list(self.returns.keys())[0]
340
+ return_type = self.returns[registered_name]
341
+ self.script_name = "tdml_udf_name_{}_udf_type_{}_register.py".format(registered_name, return_type)
342
+ self.script_base_name = Path(self.script_name).stem
343
+ else:
344
+ # Generate script name and alias, and add entry to a Garbage Collector.
345
+ # script_entry is the string that is added to Garbage collector.
346
+ # It has the format "<databasename>"."<file_id>".
347
+ self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
348
+
349
+ if self.operation not in [TableOperatorConstants.UDF_OP.value, TableOperatorConstants.REGISTER_OP.value]:
350
+ # Get the converters to use with pandas.read_csv, and to correctly
351
+ # typecast the numeric data.
352
+ python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
353
+ for col in self.data._metaexpr.c]
354
+ input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
355
+
356
+ python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
357
+ for type_ in list(self.returns.values())]
358
+ output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
347
359
 
348
360
  # Create script in .teradataml directory.
349
361
  script_dir = GarbageCollector._get_temp_dir_name()
@@ -357,35 +369,16 @@ class _TableOperatorUtils:
357
369
  "templates")
358
370
  # Get the template.
359
371
  template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
360
- TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value}
372
+ TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value,
373
+ TableOperatorConstants.REGISTER_OP.value: TableOperatorConstants.REGISTER_TEMPLATE.value }
361
374
  template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
362
375
  # Write to the script based on the template.
363
376
  try:
364
377
  with open(os.path.join(template_dir, template_name), 'r') as input_file:
365
378
  with open(self.script_path, 'w') as output_file:
366
379
  if self.operation == TableOperatorConstants.UDF_OP.value:
367
-
368
- # Function can have udf as decorator. Remove that.
369
- # The below notation
370
- # @udf
371
- # def to_upper(s):
372
- # return s.upper()
373
- # Then source code will be as it is.
374
- # But if below notation is used,
375
- # f = udf(to_upper)
376
- # Then source code will not have udf.
377
- # So, remove first line if it comes with first notation.
378
- # For both notations if in starting function defination have any extra space. Remove that.
379
- # If multiple UDF's are there append them as a single string.
380
380
 
381
- user_function_code = ""
382
- for udf_code in self.user_function:
383
- udf_code = getsource(udf_code)
384
- udf_code = udf_code.lstrip()
385
- if udf_code.startswith("@"):
386
- udf_code = udf_code[udf_code.find("\n")+1: ].lstrip()
387
- user_function_code += udf_code + '\n'
388
-
381
+ user_function_code = UtilFuncs._func_to_string(self.user_function)
389
382
  output_file.write(input_file.read().format(
390
383
  DELIMITER=self.delimiter,
391
384
  QUOTECHAR=self.quotechar,
@@ -396,6 +389,13 @@ class _TableOperatorUtils:
396
389
  COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
397
390
  OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
398
391
  ))
392
+ elif self.operation == TableOperatorConstants.REGISTER_OP.value:
393
+ # Get the source code of the user function.
394
+ user_function_code = UtilFuncs._func_to_string(self.user_function)
395
+ output_file.write(input_file.read().format(
396
+ FUNCTION_DEFINITION=user_function_code,
397
+ FUNCTION_NAME = self.user_function[0].__name__
398
+ ))
399
399
  else:
400
400
  # prepare script file from template file for maprow and mappartition.
401
401
  output_file.write(
@@ -494,7 +494,6 @@ class _TableOperatorUtils:
494
494
  script_name = script_alias # alias now contains extension also.
495
495
 
496
496
  # Extract the base name without extension.
497
- from pathlib import Path
498
497
  script_base_name = Path(script_alias).stem
499
498
  return script_entry, script_alias, script_name, script_base_name
500
499
 
@@ -0,0 +1,69 @@
1
+ import json
2
+ import sys, csv
3
+ import datetime
4
+ import urllib.parse
5
+
6
+ td_buffer = {{}}
7
+
8
+
9
+ {FUNCTION_DEFINITION}
10
+
11
+ # Decode the URL encoded string and store it back as dictionary.
12
+ dec = urllib.parse.unquote_plus(sys.argv[1])
13
+ script_data = json.loads(dec)
14
+
15
+ # Information that is required to help with the script usage.
16
+ # The delimiter to use with the input and output text.
17
+ delimiter = script_data["delimiter"]
18
+ # The quotechar to use.
19
+ quotechar = script_data["qoutechar"]
20
+ # The names of columns in the input teradataml DataFrame.
21
+ _input_columns = script_data["input_cols"]
22
+ # The names of columns in the output teradataml DataFrame.
23
+ _output_columns = script_data["output_cols"]
24
+ # The types of columns in the input/output teradataml DataFrame.
25
+ # The mapper of output column name to function arguments
26
+ function_args = script_data["function_args"]
27
+ # The definition for new columns in output.
28
+ columns_definitions = {{_output_columns[-1]: "{FUNCTION_NAME}"}}
29
+ output_type_converters = script_data["output_type_converters"]
30
+ for k,v in output_type_converters.items():
31
+ if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
32
+ output_type_converters[k] = 'str'
33
+ output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
34
+
35
+
36
+
37
+ # The entry point to the script.
38
+ if __name__ == "__main__":
39
+
40
+ records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
41
+ for record in records:
42
+ record = dict(zip(_input_columns, record))
43
+ out_rec = []
44
+ for column in _output_columns:
45
+
46
+ # If it is a new column, get the value from definition.
47
+ if column in columns_definitions:
48
+ f_args = tuple()
49
+ # Convert the argument types first.
50
+ for v in function_args[column]:
51
+ if v in _input_columns:
52
+ c_type_ = output_type_converters.get(v)
53
+ if record[v]:
54
+ # If it is a float, replace the empty character.
55
+ if c_type_.__name__ == 'float':
56
+ arg = output_type_converters.get(v)(record[v].replace(' ', ''))
57
+ else:
58
+ arg = output_type_converters.get(v)(record[v])
59
+ else:
60
+ arg = record[v]
61
+ else:
62
+ arg = v
63
+ f_args = f_args + (arg, )
64
+ func_ = globals()[columns_definitions[column]]
65
+ out_rec.append(output_type_converters[column](func_(*f_args)))
66
+ else:
67
+ out_rec.append(record[column])
68
+
69
+ print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
@@ -293,10 +293,57 @@ class _DtypesMappers:
293
293
  VARCHAR: lambda x: "{0},{1}".format(x.__class__.__name__, x.length)
294
294
  }
295
295
 
296
+ # Holds mapping between string representation of teradatasqlalchemy type
297
+ # and actual teradatasqlalchemy type.
298
+ DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER = {
299
+ "CHAR": CHAR,
300
+ "VARCHAR": VARCHAR,
301
+
302
+ "BYTEINT": BYTEINT,
303
+ "SMALLINT": SMALLINT,
304
+ "INTEGER": INTEGER,
305
+ "BIGINT": BIGINT,
306
+
307
+ "REAL": FLOAT,
308
+ "FLOAT": FLOAT,
309
+ "DOUBLE": FLOAT,
310
+ "DECIMAL": DECIMAL,
311
+ "NUMBER": NUMBER,
312
+
313
+ "DATE": DATE,
314
+ "TIME": TIME,
315
+ "TIMESTAMP": TIMESTAMP,
316
+ "TIMESTAMP_WTZ": TIMESTAMP,
317
+
318
+ "BYTE": BYTE,
319
+ "VARBYTE": VARBYTE,
320
+ "BLOB": BLOB,
321
+ # TODO: Add CLOB type when support is added from OTF.
322
+
323
+ # TODO: Check these types when corresponding data type support
324
+ # is available from OTF support or not.
325
+ "INTERVAL_YEAR": INTERVAL_YEAR,
326
+ "INTERVAL_YTM": INTERVAL_YEAR_TO_MONTH,
327
+ "INTERVAL_MONTH": INTERVAL_MONTH,
328
+ "INTERVAL_DAY": INTERVAL_DAY,
329
+
330
+ "INTERVAL_DTH": INTERVAL_DAY_TO_HOUR,
331
+ "INTERVAL_DTM": INTERVAL_DAY_TO_MINUTE,
332
+ "INTERVAL_DTS": INTERVAL_DAY_TO_SECOND,
333
+ "INTERVAL_HOUR": INTERVAL_HOUR,
334
+ "INTERVAL_HTM": INTERVAL_HOUR_TO_MINUTE,
335
+ "INTERVAL_HTS": INTERVAL_HOUR_TO_SECOND,
336
+ "INTERVAL_MINUTE": INTERVAL_MINUTE,
337
+ "INTERVAL_MTS": INTERVAL_MINUTE_TO_SECOND,
338
+ "INTERVAL_SECOND": INTERVAL_SECOND
339
+ }
340
+
341
+
296
342
  class _SuppArgTypes:
297
343
  VAL_ARG_DATATYPE = (str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
298
344
  TIMESTAMP, VARCHAR)
299
345
 
346
+
300
347
  class _Dtypes:
301
348
 
302
349
  @staticmethod
@@ -641,11 +688,13 @@ class _Dtypes:
641
688
 
642
689
  """
643
690
  from teradataml.dataframe.dataframe import TDSeries, TDMatrix, TDGenSeries, TDAnalyticResult
691
+ from teradataml.store.feature_store.feature_store import Feature
644
692
  _DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.update({"SERIES": TDSeries,
645
693
  "MATRIX": TDMatrix,
646
694
  "ART": TDAnalyticResult,
647
- "GENSERIES": TDGenSeries})
648
-
695
+ "GENSERIES": TDGenSeries,
696
+ "COLUMN": (str, Feature),
697
+ "COLUMNS": (str, Feature)})
649
698
 
650
699
  return _DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.get(json_td_type.upper())
651
700
 
@@ -82,3 +82,21 @@ class _InternalBuffer:
82
82
  """
83
83
  if key in cls.__data:
84
84
  return cls.__data.get(key)
85
+
86
+ @classmethod
87
+ def remove_key(cls, key):
88
+ """
89
+ DESCRIPTION:
90
+ Remove a particular key from the internal buffer.
91
+
92
+ RETURNS:
93
+ None
94
+
95
+ RAISES:
96
+ None
97
+
98
+ EXAMPLES:
99
+ # Remove all json objects from _InternalBuffer.
100
+ _InternalBuffer.remove_key("vs_session_id")
101
+ """
102
+ del cls.__data[key]
@@ -1,3 +1,4 @@
1
+ import enum
1
2
  import numbers
2
3
  import os
3
4
  import pandas as pd
@@ -11,6 +12,8 @@ from teradataml.options.configure import configure
11
12
  from teradataml.dataframe.sql_interfaces import ColumnExpression
12
13
  from functools import wraps, reduce
13
14
 
15
+ from teradataml.utils.internal_buffer import _InternalBuffer
16
+
14
17
 
15
18
  def skip_validation():
16
19
  """
@@ -283,7 +286,8 @@ class _Validators:
283
286
 
284
287
  @staticmethod
285
288
  @skip_validation()
286
- def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False):
289
+ def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False,
290
+ case_insensitive=False):
287
291
  """
288
292
  Function to check whether column names in columns are present in given dataframe or not.
289
293
  This function is used currently only for Analytics wrappers.
@@ -309,12 +313,19 @@ class _Validators:
309
313
  Specifies the name of the dataframe argument.
310
314
  Types: str
311
315
 
312
- isPartitionArg:
316
+ is_partition_arg:
313
317
  Optional Argument.
314
318
  Specifies a bool argument notifying, whether argument being validate is
315
319
  Partition argument or not.
316
320
  Types: bool
317
321
 
322
+ case_insensitive:
323
+ Optional Argument.
324
+ Specifies a bool argument notifying, whether to check column names
325
+ in case-insensitive manner or not.
326
+ Default Value: False
327
+ Types: bool
328
+
318
329
  RAISES:
319
330
  TeradataMlException - TDMLDF_COLUMN_IN_ARG_NOT_FOUND column(s) does not exist in a dataframe.
320
331
 
@@ -356,7 +367,7 @@ class _Validators:
356
367
  try:
357
368
  # Check if its a sinlge column with one separator. For e.g. column:A.
358
369
  # If yes, just continue.
359
- _Validators._validate_column_exists_in_dataframe(column, data._metaexpr)
370
+ _Validators._validate_column_exists_in_dataframe(column, data._metaexpr, case_insensitive=case_insensitive)
360
371
  continue
361
372
  except:
362
373
  # User has provided range value.
@@ -379,7 +390,8 @@ class _Validators:
379
390
  total_columns.append(column)
380
391
 
381
392
  return _Validators._validate_column_exists_in_dataframe(total_columns, data._metaexpr, column_arg=column_arg,
382
- data_arg=data_arg)
393
+ data_arg=data_arg, case_insensitive=case_insensitive)
394
+
383
395
 
384
396
  @staticmethod
385
397
  @skip_validation()
@@ -545,7 +557,7 @@ class _Validators:
545
557
  raise TypeError("Third element in argument information matrix should be bool.")
546
558
 
547
559
  if not (isinstance(args[3], tuple) or isinstance(args[3], type) or
548
- isinstance(args[3], (_ListOf, _TupleOf))):
560
+ isinstance(args[3], (_ListOf, _TupleOf)) or isinstance(args[3], enum.EnumMeta)):
549
561
  err_msg = "Fourth element in argument information matrix should be a 'tuple of types' or 'type' type."
550
562
  raise TypeError(err_msg)
551
563
 
@@ -1395,7 +1407,8 @@ class _Validators:
1395
1407
 
1396
1408
  @staticmethod
1397
1409
  @skip_validation()
1398
- def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True):
1410
+ def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True,
1411
+ case_insensitive=False):
1399
1412
  """
1400
1413
  Internal function to validate the column existence and type of an input DataFrame column against
1401
1414
  a list of unexpected types.
@@ -1461,7 +1474,7 @@ class _Validators:
1461
1474
 
1462
1475
  # Check for column existence.
1463
1476
  if check_exist:
1464
- _Validators._validate_column_exists_in_dataframe(col, df._metaexpr)
1477
+ _Validators._validate_column_exists_in_dataframe(col, df._metaexpr, case_insensitive=case_insensitive)
1465
1478
 
1466
1479
  if isinstance(df[col].type, unexpected_types):
1467
1480
  if raise_error:
@@ -2274,4 +2287,82 @@ class _Validators:
2274
2287
  MessageCodes.INVALID_ARG_VALUE).format(ip_address, "ip_address",
2275
2288
  'of four numbers (each between 0 and 255) separated by periods'))
2276
2289
 
2277
- return True
2290
+ return True
2291
+
2292
+
2293
+ @staticmethod
2294
+ @skip_validation()
2295
+ def _check_auth_token(func_name):
2296
+ """
2297
+ DESCRIPTION:
2298
+ Check if the user has set the authentication token.
2299
+
2300
+ PARAMETERS:
2301
+ func_name:
2302
+ Required Argument.
2303
+ Specifies the function name where the authentication token is required.
2304
+ Types: str
2305
+
2306
+ RAISES:
2307
+ TeradataMLException
2308
+
2309
+ RETURNS:
2310
+ None.
2311
+
2312
+ EXAMPLES:
2313
+ >>> _Validators._check_auth_token("udf")
2314
+ """
2315
+ if _InternalBuffer.get("auth_token") is None:
2316
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS,\
2317
+ 'Auth Token', func_name,
2318
+ 'set_auth_token'),
2319
+ MessageCodes.SET_REQUIRED_PARAMS)
2320
+
2321
+ return True
2322
+
2323
+ @staticmethod
2324
+ def _check_required_params(arg_value, arg_name, caller_func_name, target_func_name):
2325
+ """
2326
+ DESCRIPTION:
2327
+ Check if the required argument is not None.
2328
+
2329
+ PARAMETERS:
2330
+ arg_value:
2331
+ Required Argument.
2332
+ Specifies the argument value to be
2333
+ checked for non None values.
2334
+ Types: str, float, int, bool
2335
+
2336
+ arg_name:
2337
+ Required Argument.
2338
+ Specifies the argument name.
2339
+ Types: str
2340
+
2341
+ caller_func_name:
2342
+ Required Argument.
2343
+ Specifies the function name which calls this function.
2344
+ This is required for the error message.
2345
+ Types: str
2346
+
2347
+ target_func_name:
2348
+ Required Argument.
2349
+ Specifies the function name which the user needs to call
2350
+ so that the error is fixed.
2351
+ This is required for the error message.
2352
+ Types: str
2353
+
2354
+ RAISES:
2355
+ TeradataMLException
2356
+
2357
+ RETURNS:
2358
+ True.
2359
+
2360
+ EXAMPLES:
2361
+ >>> _Validators._check_required_params("udf", "arg_name")
2362
+ """
2363
+ if arg_value is None:
2364
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS, \
2365
+ arg_name, caller_func_name,
2366
+ target_func_name),
2367
+ MessageCodes.SET_REQUIRED_PARAMS)
2368
+ return True