teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +193 -1
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +25 -18
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  8. teradataml/analytics/sqle/__init__.py +20 -2
  9. teradataml/analytics/utils.py +15 -1
  10. teradataml/analytics/valib.py +18 -4
  11. teradataml/automl/__init__.py +341 -112
  12. teradataml/automl/autodataprep/__init__.py +471 -0
  13. teradataml/automl/data_preparation.py +84 -42
  14. teradataml/automl/data_transformation.py +69 -33
  15. teradataml/automl/feature_engineering.py +76 -9
  16. teradataml/automl/feature_exploration.py +639 -25
  17. teradataml/automl/model_training.py +35 -14
  18. teradataml/clients/auth_client.py +2 -2
  19. teradataml/common/__init__.py +1 -2
  20. teradataml/common/constants.py +122 -63
  21. teradataml/common/messagecodes.py +14 -3
  22. teradataml/common/messages.py +8 -4
  23. teradataml/common/sqlbundle.py +40 -10
  24. teradataml/common/utils.py +366 -74
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +348 -86
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/apriori_example.json +22 -0
  29. teradataml/data/byom_example.json +11 -0
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  37. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  38. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  40. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  41. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  42. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  43. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  45. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  49. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  51. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  52. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  53. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  54. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  55. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  57. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  58. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  59. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  60. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  61. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  62. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  63. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  64. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  65. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  67. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  68. teradataml/data/hnsw_alter_data.csv +5 -0
  69. teradataml/data/hnsw_data.csv +10 -0
  70. teradataml/data/jsons/byom/h2opredict.json +1 -1
  71. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  72. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  73. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  74. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  75. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  76. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  77. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  78. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  79. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  80. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  81. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  82. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  83. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  84. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  85. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  86. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  87. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  88. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  89. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  90. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  91. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  92. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  93. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  94. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
  95. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
  96. teradataml/data/ner_dict.csv +8 -0
  97. teradataml/data/ner_input_eng.csv +7 -0
  98. teradataml/data/ner_rule.csv +5 -0
  99. teradataml/data/pos_input.csv +40 -0
  100. teradataml/data/tdnerextractor_example.json +14 -0
  101. teradataml/data/teradataml_example.json +21 -0
  102. teradataml/data/textmorph_example.json +5 -0
  103. teradataml/data/to_num_data.csv +4 -0
  104. teradataml/data/tochar_data.csv +5 -0
  105. teradataml/data/trans_dense.csv +16 -0
  106. teradataml/data/trans_sparse.csv +55 -0
  107. teradataml/data/vectordistance_example.json +1 -1
  108. teradataml/dataframe/copy_to.py +45 -29
  109. teradataml/dataframe/data_transfer.py +72 -46
  110. teradataml/dataframe/dataframe.py +642 -166
  111. teradataml/dataframe/dataframe_utils.py +167 -22
  112. teradataml/dataframe/functions.py +135 -20
  113. teradataml/dataframe/setop.py +11 -6
  114. teradataml/dataframe/sql.py +330 -78
  115. teradataml/dbutils/dbutils.py +556 -140
  116. teradataml/dbutils/filemgr.py +14 -10
  117. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  118. teradataml/lib/aed_0_1.dll +0 -0
  119. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
  120. teradataml/opensource/_class.py +141 -17
  121. teradataml/opensource/{constants.py → _constants.py} +7 -3
  122. teradataml/opensource/_lightgbm.py +52 -53
  123. teradataml/opensource/_sklearn.py +1008 -0
  124. teradataml/opensource/_wrapper_utils.py +5 -5
  125. teradataml/options/__init__.py +47 -15
  126. teradataml/options/configure.py +103 -26
  127. teradataml/options/display.py +13 -2
  128. teradataml/plot/axis.py +47 -8
  129. teradataml/plot/figure.py +33 -0
  130. teradataml/plot/plot.py +63 -13
  131. teradataml/scriptmgmt/UserEnv.py +307 -40
  132. teradataml/scriptmgmt/lls_utils.py +428 -145
  133. teradataml/store/__init__.py +2 -3
  134. teradataml/store/feature_store/feature_store.py +102 -7
  135. teradataml/table_operators/Apply.py +48 -19
  136. teradataml/table_operators/Script.py +23 -2
  137. teradataml/table_operators/TableOperator.py +3 -1
  138. teradataml/table_operators/table_operator_util.py +58 -9
  139. teradataml/utils/dtypes.py +49 -1
  140. teradataml/utils/internal_buffer.py +38 -0
  141. teradataml/utils/validators.py +377 -62
  142. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
  143. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
  144. teradataml/data/SQL_Fundamentals.pdf +0 -0
  145. teradataml/libaed_0_1.dylib +0 -0
  146. teradataml/libaed_0_1.so +0 -0
  147. teradataml/opensource/sklearn/__init__.py +0 -0
  148. teradataml/store/vector_store/__init__.py +0 -1586
  149. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  150. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  151. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -28,13 +28,16 @@ def skip_validation():
28
28
  def validation_func(): ...
29
29
 
30
30
  """
31
+
31
32
  def decorator(func):
32
33
  def wrapper(*args, **kwargs):
33
34
  # If "skip_all" flag is set to False,
34
35
  # skip all validation execution.
35
36
  if not _Validators.skip_all:
36
37
  return func(*args, **kwargs)
38
+
37
39
  return wraps(func)(wrapper)
40
+
38
41
  return decorator
39
42
 
40
43
 
@@ -286,7 +289,8 @@ class _Validators:
286
289
 
287
290
  @staticmethod
288
291
  @skip_validation()
289
- def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False):
292
+ def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False,
293
+ case_insensitive=False):
290
294
  """
291
295
  Function to check whether column names in columns are present in given dataframe or not.
292
296
  This function is used currently only for Analytics wrappers.
@@ -312,12 +316,19 @@ class _Validators:
312
316
  Specifies the name of the dataframe argument.
313
317
  Types: str
314
318
 
315
- isPartitionArg:
319
+ is_partition_arg:
316
320
  Optional Argument.
317
321
  Specifies a bool argument notifying, whether argument being validate is
318
322
  Partition argument or not.
319
323
  Types: bool
320
324
 
325
+ case_insensitive:
326
+ Optional Argument.
327
+ Specifies a bool argument notifying, whether to check column names
328
+ in case-insensitive manner or not.
329
+ Default Value: False
330
+ Types: bool
331
+
321
332
  RAISES:
322
333
  TeradataMlException - TDMLDF_COLUMN_IN_ARG_NOT_FOUND column(s) does not exist in a dataframe.
323
334
 
@@ -338,7 +349,7 @@ class _Validators:
338
349
  columns = [columns]
339
350
 
340
351
  total_columns = []
341
-
352
+
342
353
  for column in columns:
343
354
  for separator in TeradataConstants.RANGE_SEPARATORS.value:
344
355
  if column is None:
@@ -357,32 +368,33 @@ class _Validators:
357
368
  # ':' specifies all columns in the table.
358
369
 
359
370
  try:
360
- # Check if its a sinlge column with one separator. For e.g. column:A.
371
+ # Check if it's a single column with one separator. For e.g. column:A.
361
372
  # If yes, just continue.
362
- _Validators._validate_column_exists_in_dataframe(column, data._metaexpr)
373
+ _Validators._validate_column_exists_in_dataframe(column, data._metaexpr,
374
+ case_insensitive=case_insensitive)
363
375
  continue
364
376
  except:
365
377
  # User has provided range value.
366
378
  column_names = column.split(separator)
367
379
  if (len(column_names) == 2 and
368
- any([column_names[0].isdigit(), column_names[1].isdigit()]) and
369
- not all([column_names[0].isdigit(), column_names[1].isdigit()]) and
370
- not "" in column_names):
371
- # Raises Exception if column range has mixed types. For e.g. "4:XYZ".
372
- err_msg = Messages.get_message(MessageCodes.MIXED_TYPES_IN_COLUMN_RANGE)
373
- raise ValueError(err_msg.format(column_arg))
374
-
380
+ any([column_names[0].isdigit(), column_names[1].isdigit()]) and
381
+ not all([column_names[0].isdigit(), column_names[1].isdigit()]) and
382
+ not "" in column_names):
383
+ # Raises Exception if column range has mixed types. For e.g. "4:XYZ".
384
+ err_msg = Messages.get_message(MessageCodes.MIXED_TYPES_IN_COLUMN_RANGE)
385
+ raise ValueError(err_msg.format(column_arg))
386
+
375
387
  for col in column_names:
376
388
  if not col.isdigit() and col != "":
377
389
  total_columns.append(col)
378
390
 
379
391
  elif column.count(separator) > 1:
380
- continue
392
+ continue
381
393
  else:
382
394
  total_columns.append(column)
383
395
 
384
396
  return _Validators._validate_column_exists_in_dataframe(total_columns, data._metaexpr, column_arg=column_arg,
385
- data_arg=data_arg)
397
+ data_arg=data_arg, case_insensitive=case_insensitive)
386
398
 
387
399
  @staticmethod
388
400
  @skip_validation()
@@ -446,9 +458,9 @@ class _Validators:
446
458
  if isinstance(columns, str):
447
459
  columns = [columns]
448
460
 
449
- # Constructing New unquotted column names for selected columns ONLY using Parent _metaexpr
461
+ # Constructing New unquoted column names for selected columns ONLY using Parent _metaexpr
450
462
  if case_insensitive:
451
- # If lookup has to be a case insensitive then convert the
463
+ # If lookup has to be a case-insensitive then convert the
452
464
  # metaexpr columns name to lower case.
453
465
  unquoted_df_columns = [c.name.replace('"', "").lower() for c in metaexpr.c]
454
466
  else:
@@ -459,13 +471,10 @@ class _Validators:
459
471
  if column_name is None:
460
472
  column_name = str(column_name)
461
473
 
462
- if case_insensitive:
463
- # If lookup has to be a case insensitive then convert the
464
- # column name to lower case.
465
- column_name = column_name.lower()
474
+ case_based_column_name = column_name.lower() if case_insensitive else column_name
466
475
 
467
476
  # If column name does not exist in metaexpr, raise the exception
468
- if not column_name.replace('"', "") in unquoted_df_columns:
477
+ if not case_based_column_name.replace('"', "") in unquoted_df_columns:
469
478
  if column_arg and data_arg:
470
479
  raise ValueError(Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND,
471
480
  column_name,
@@ -474,7 +483,8 @@ class _Validators:
474
483
  "Table" if for_table else "DataFrame"))
475
484
  else:
476
485
  raise ValueError(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL,
477
- column_name, sorted(unquoted_df_columns)))
486
+ column_name,
487
+ sorted([c.name.replace('"', "") for c in metaexpr.c])))
478
488
 
479
489
  return True
480
490
 
@@ -509,7 +519,7 @@ class _Validators:
509
519
 
510
520
  @staticmethod
511
521
  @skip_validation()
512
- def _validate_function_arguments(arg_list, skip_empty_check = None):
522
+ def _validate_function_arguments(arg_list, skip_empty_check=None):
513
523
  """
514
524
  Method to verify that the input arguments are of valid data type except for
515
525
  argument of DataFrameType.
@@ -534,10 +544,29 @@ class _Validators:
534
544
  EXAMPLES:
535
545
  _Validators._validate_function_arguments(arg_list)
536
546
  """
547
+ # arg_list is list of list, where each inner list can have maximum 6 elements
548
+ # and must have minimum (first) 4 elements:
549
+ # Consider following inner list.
550
+ # [element1, element2, element3, element4, element5, element6]
551
+ # Corresponds to:
552
+ # [<1_arg_name>, <2_arg_value>, <3_is_optional>, <4_tuple_of_accepted_types>,
553
+ # <5_empty_not_allowed>, <6_list_of_permitted_values>]
554
+ # e.g.
555
+ # arg_list = [["join", join, True, (str), True, concat_join_permitted_values]]
556
+ # 1. element1 --> Argument Name, a string. ["join" in above example.]
557
+ # 2. element2 --> Argument itself. [join]
558
+ # 3. element3 --> Specifies a flag that mentions if argument is optional or not.
559
+ # False means required argument and True means optional argument.
560
+ # 4. element4 --> Tuple of accepted types. (str) in above example.
561
+ # 5. element5 --> True, means validate for empty value. Error will be raised, if empty values are passed.
562
+ # If not specified, argument value will not be validated for empty value.
563
+ # 6. element6 --> A list of permitted values, an argument can accept.
564
+ # If not specified, argument value will not be validated against any permitted values.
565
+ # If a list is passed, validation will be performed for permitted values.
537
566
  invalid_arg_names = []
538
567
  invalid_arg_types = []
539
568
 
540
- typeCheckFailed = False
569
+ type_check_failed = False
541
570
 
542
571
  for args in arg_list:
543
572
  num_args = len(args)
@@ -560,7 +589,7 @@ class _Validators:
560
589
  # Let's validate argument types.
561
590
  #
562
591
  # Verify datatypes for arguments which are required or the optional arguments are not None
563
- if ((args[2] == True and args[1] is not None) or (args[2] == False)):
592
+ if (args[2] == True and args[1] is not None) or (args[2] == False):
564
593
  # Validate the types of argument, if expected types are instance of tuple or type
565
594
  dtype_list = _Validators.__getTypeAsStr(args[3])
566
595
 
@@ -577,13 +606,13 @@ class _Validators:
577
606
  if not _Validators._check_isinstance(value, args[3]):
578
607
  invalid_arg_names.append(args[0])
579
608
  invalid_arg_types.append(valid_types_str)
580
- typeCheckFailed = True
609
+ type_check_failed = True
581
610
  break
582
611
  elif not _Validators._check_isinstance(args[1], args[3]):
583
612
  # Argument is not of type list.
584
613
  invalid_arg_names.append(args[0])
585
614
  invalid_arg_types.append(valid_types_str)
586
- typeCheckFailed = True
615
+ type_check_failed = True
587
616
 
588
617
  elif isinstance(args[3], tuple):
589
618
  # Argument can accept values of multiple types, but not list.
@@ -591,30 +620,30 @@ class _Validators:
591
620
  if not _Validators._check_isinstance(args[1], args[3]):
592
621
  invalid_arg_names.append(args[0])
593
622
  invalid_arg_types.append(valid_types_str)
594
- typeCheckFailed = True
623
+ type_check_failed = True
595
624
  else:
596
625
  # Argument can accept values of single type.
597
626
  valid_types_str = " or ".join(dtype_list)
598
627
  if not _Validators._check_isinstance(args[1], args[3]):
599
628
  invalid_arg_names.append(args[0])
600
629
  invalid_arg_types.append(valid_types_str)
601
- typeCheckFailed = True
630
+ type_check_failed = True
602
631
 
603
632
  #
604
633
  # Validate the arguments for empty value
605
634
  #
606
- if not typeCheckFailed and len(args) >= 5:
635
+ if not type_check_failed and len(args) >= 5:
607
636
  if args[4]:
608
637
  _Validators._validate_input_columns_not_empty(args[1], args[0], skip_empty_check)
609
638
 
610
639
  #
611
640
  # Validate the arguments for permitted values
612
641
  #
613
- if not typeCheckFailed and len(args) >= 6:
642
+ if not type_check_failed and len(args) >= 6:
614
643
  if args[5] is not None:
615
644
  _Validators._validate_permitted_values(args[1], args[5], args[0], supported_types=args[3])
616
645
 
617
- if typeCheckFailed:
646
+ if type_check_failed:
618
647
  if len(invalid_arg_names) != 0:
619
648
  raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
620
649
  invalid_arg_names, invalid_arg_types))
@@ -623,7 +652,7 @@ class _Validators:
623
652
 
624
653
  @staticmethod
625
654
  @skip_validation()
626
- def _validate_input_columns_not_empty(arg, arg_name, skip_empty_check = None):
655
+ def _validate_input_columns_not_empty(arg, arg_name, skip_empty_check=None):
627
656
  """
628
657
  Function to check whether argument is empty string or not.
629
658
 
@@ -655,7 +684,7 @@ class _Validators:
655
684
  for col in arg:
656
685
  if not (skip_empty_check and arg_name in skip_empty_check.keys() and col in skip_empty_check[arg_name]):
657
686
  if isinstance(col, str):
658
- if (not (col is None) and ((len(col.strip()) == 0))):
687
+ if (not (col is None)) and (len(col.strip()) == 0):
659
688
  raise ValueError(Messages.get_message(MessageCodes.ARG_EMPTY, arg_name))
660
689
  return True
661
690
 
@@ -695,8 +724,8 @@ class _Validators:
695
724
  if args[2] == False and args[1] is None:
696
725
  miss_args.append(args[0])
697
726
 
698
- if (len(miss_args)>0):
699
- raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS,miss_args),
727
+ if len(miss_args) > 0:
728
+ raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, miss_args),
700
729
  MessageCodes.MISSING_ARGS)
701
730
  return True
702
731
 
@@ -804,7 +833,7 @@ class _Validators:
804
833
  invalid_values.sort()
805
834
 
806
835
  # Concatenate the message for datatypes not present in datatypes of permitted_values.
807
- if len(add_types) > 0 :
836
+ if len(add_types) > 0:
808
837
  add_types = _Validators.__getTypeAsStr(add_types)
809
838
  add_types = " or ".join(add_types)
810
839
  permitted_values = "{} {}".format(permitted_values, "or any values of type {}".format(add_types))
@@ -969,7 +998,7 @@ class _Validators:
969
998
  return True
970
999
 
971
1000
  is_arg_in_lower_bound, is_arg_in_upper_bound = True, True
972
- lbound_msg, ubound_msg = "", ""
1001
+ lbound_msg, ubound_msg = "", ""
973
1002
 
974
1003
  # Check for lower bound.
975
1004
  if lbound is not None:
@@ -1062,7 +1091,7 @@ class _Validators:
1062
1091
  if timebucket_duration is None:
1063
1092
  return True
1064
1093
 
1065
- # Check if notation if formal or shorthand (beginning with a digit)
1094
+ # Check if notation is formal or shorthand (beginning with a digit)
1066
1095
  if timebucket_duration[0].isdigit():
1067
1096
  valid_timebucket_durations = PTITableConstants.VALID_TIMEBUCKET_DURATIONS_SHORTHAND.value
1068
1097
  pattern_to_use = PTITableConstants.PATTERN_TIMEBUCKET_DURATION_SHORT.value
@@ -1090,7 +1119,7 @@ class _Validators:
1090
1119
 
1091
1120
  @staticmethod
1092
1121
  @skip_validation()
1093
- def _validate_column_type(df, col, col_arg, expected_types, raiseError = True):
1122
+ def _validate_column_type(df, col, col_arg, expected_types, raiseError=True):
1094
1123
  """
1095
1124
  Internal function to validate the type of an input DataFrame column against
1096
1125
  a list of expected types.
@@ -1114,7 +1143,7 @@ class _Validators:
1114
1143
 
1115
1144
  expected_types:
1116
1145
  Required Argument.
1117
- Specifies a list of teradatasqlachemy datatypes that the column is
1146
+ Specifies a list of teradatasqlalchemy datatypes that the column is
1118
1147
  expected to be of type.
1119
1148
  Types: list of teradatasqlalchemy types
1120
1149
 
@@ -1125,10 +1154,10 @@ class _Validators:
1125
1154
  Types: bool
1126
1155
 
1127
1156
  RETURNS:
1128
- True, when the columns is of an expected type.
1157
+ True, when the column is of an expected type.
1129
1158
 
1130
1159
  RAISES:
1131
- TeradataMlException, when the columns is not one of the expected types.
1160
+ TeradataMlException, when the column is not one of the expected types.
1132
1161
 
1133
1162
  EXAMPLES:
1134
1163
  _Validators._validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES)
@@ -1398,7 +1427,157 @@ class _Validators:
1398
1427
 
1399
1428
  @staticmethod
1400
1429
  @skip_validation()
1401
- def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True):
1430
+ def _validate_mutually_exclusive_argument_groups(*arg_groups, all_falsy_check=False,
1431
+ return_all_falsy_status=False):
1432
+ """
1433
+ DESCRIPTION:
1434
+ Function to validate mutual exclusiveness of group of arguments.
1435
+
1436
+ PARAMETERS:
1437
+ *arg_groups:
1438
+ Specifies variable length argument list where each argument in list is a dictionary
1439
+ representing one group of arguments which should be mutually exclusive from
1440
+ other groups of arguments. Each dictionary contains key-value pairs for argument
1441
+ name and its value.
1442
+
1443
+ all_falsy_check:
1444
+ Optional Argument.
1445
+ Specifies whether to throw Teradataml Exception when all arguments in all argument
1446
+ groups hold Falsy/null values.
1447
+ Types: bool
1448
+
1449
+ return_all_falsy_status:
1450
+ Optional Argument.
1451
+ Specifies whether to return the boolean flag which states if all arguments in all argument
1452
+ groups hold Falsy/null values.
1453
+ Types: bool
1454
+
1455
+ RETURNS:
1456
+ * When "return_all_falsy_status" is True:
1457
+ * True: If all arguments in all argument groups hold Falsy/null values.
1458
+ * False: If all arguments in all argument groups do not hold Falsy/null values.
1459
+ * When "return_all_falsy_status" is False:
1460
+ None
1461
+ RAISES:
1462
+ TeradataMLException
1463
+
1464
+ EXAMPLES:
1465
+ # Example 1: When groups of arguments are not mutually exclusive.
1466
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": "arg1"},
1467
+ ... {"arg2": "arg2"},
1468
+ ... {"arg3": "arg3", "arg4": "arg4"})
1469
+ [Teradata][teradataml](TDML_2061) Provide either '['arg1']' argument(s) or '['arg2']' argument(s) or '['arg3', 'arg4']' argument(s).
1470
+
1471
+ # Example 2: When groups of arguments are mutually exclusive.
1472
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
1473
+ ... {"arg2": ""},
1474
+ ... {"arg3": "arg3", "arg4": "arg4"})
1475
+
1476
+ # Example 3: When all groups of arguments hold falsy values
1477
+ # and "all_falsy_check" is set to True.
1478
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
1479
+ ... {"arg2": None},
1480
+ ... {"arg3": None, "arg4": None},
1481
+ ... all_falsy_check=True)
1482
+ [Teradata][teradataml](TDML_2061) Provide either '['arg1']' argument(s) or '['arg2']' argument(s) or '['arg3', 'arg4']' argument(s).
1483
+
1484
+ # Example 4: When all groups of arguments hold falsy values
1485
+ # and "all_falsy_check" is set to False.
1486
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
1487
+ ... {"arg2": None},
1488
+ ... {"arg3": None, "arg4": None})
1489
+
1490
+ # Example 5: When all groups of arguments hold falsy values
1491
+ # and "all_falsy_check" is set to False and
1492
+ # "return_all_falsy_status" is set to True.
1493
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
1494
+ ... {"arg2": None},
1495
+ ... {"arg3": None, "arg4": None},
1496
+ ... return_all_falsy_status=True)
1497
+ True
1498
+ """
1499
+ all_groups_falsy = True
1500
+ mutually_exclusive_groups = True
1501
+ non_falsy_groups = []
1502
+ for arg_grp in arg_groups:
1503
+ # TODO: Handling of falsy values can be done in more appropriate way by
1504
+ # differentiating None/empty string/empty list.
1505
+ is_group_falsy = not any(value for value in arg_grp.values())
1506
+ if not is_group_falsy:
1507
+ non_falsy_groups.append(arg_grp)
1508
+
1509
+ # Current group is having non-falsy values and already traversed
1510
+ # group(s) also has(have) non-falsy values. So set "mutually_exclusive_groups" to False.
1511
+ if not all_groups_falsy:
1512
+ mutually_exclusive_groups = False
1513
+
1514
+ all_groups_falsy = all_groups_falsy and is_group_falsy
1515
+
1516
+ # Raise error if any one of the below-mentioned conditions is True:
1517
+ # More than one group has non-falsy values.
1518
+ # All groups have all falsy values and "all_falsy_check" is True.
1519
+ if not mutually_exclusive_groups or (all_falsy_check and all_groups_falsy):
1520
+ if not non_falsy_groups:
1521
+ non_falsy_groups = [str(list(arg_grp.keys())) for arg_grp in arg_groups]
1522
+ else:
1523
+ non_falsy_groups = [str(list(non_falsy_group.keys())) for non_falsy_group in non_falsy_groups]
1524
+ error_msg = Messages.get_message(
1525
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, str(non_falsy_groups[0]),
1526
+ "' argument(s) or \'".join(non_falsy_groups[1:]))
1527
+
1528
+ raise TeradataMlException(error_msg, MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
1529
+
1530
+ if return_all_falsy_status:
1531
+ return all_groups_falsy
1532
+
1533
+ @staticmethod
1534
+ @skip_validation()
1535
+ def _validate_mutually_inclusive_n_arguments(**kwargs):
1536
+ """
1537
+ DESCRIPTION:
1538
+ Function to validate mutual inclusiveness of group of arguments.
1539
+
1540
+ PARAMETERS:
1541
+ **kwargs:
1542
+ Specifies variable number of keyword arguments which are to be
1543
+ validated for mutual inclusiveness.
1544
+
1545
+ RETURNS:
1546
+ True, if arguments are mutually inclusive.
1547
+
1548
+ RAISES:
1549
+ TeradataMLException
1550
+
1551
+ EXAMPLES:
1552
+ Example 1: When all arguments have non-None values.
1553
+ >>> _Validators._validate_mutually_inclusive_n_arguments(arg1="arg1", arg2="arg2",
1554
+ ... arg3="arg3", arg4="arg4")
1555
+ True
1556
+
1557
+ Example 2: When one of the arguments is empty string.
1558
+ >>> _Validators._validate_mutually_inclusive_n_arguments(arg1="arg1", arg2="arg2",
1559
+ ... arg3="arg3", arg4="")
1560
+ TeradataMlException
1561
+
1562
+ Example 3: When one of the arguments is None.
1563
+ >>> _Validators._validate_mutually_inclusive_n_arguments(arg1="arg1", arg2=None,
1564
+ ... arg3="arg3", arg4="arg4")
1565
+ TeradataMlException
1566
+ """
1567
+ # TODO: Handling of falsy values can be done in more appropriate way by
1568
+ # differentiating None/empty string/empty list.
1569
+ if all(arg_value for arg, arg_value in kwargs.items()):
1570
+ return True
1571
+ else:
1572
+ arg_list = list(kwargs.keys())
1573
+ message = Messages.get_message(MessageCodes.MUST_PASS_ARGUMENT,
1574
+ arg_list[0], " and ".join(arg_list[1:]))
1575
+ raise TeradataMlException(message, MessageCodes.MUST_PASS_ARGUMENT)
1576
+
1577
+ @staticmethod
1578
+ @skip_validation()
1579
+ def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True,
1580
+ case_insensitive=False):
1402
1581
  """
1403
1582
  Internal function to validate the column existence and type of an input DataFrame column against
1404
1583
  a list of unexpected types.
@@ -1464,7 +1643,7 @@ class _Validators:
1464
1643
 
1465
1644
  # Check for column existence.
1466
1645
  if check_exist:
1467
- _Validators._validate_column_exists_in_dataframe(col, df._metaexpr)
1646
+ _Validators._validate_column_exists_in_dataframe(col, df._metaexpr, case_insensitive=case_insensitive)
1468
1647
 
1469
1648
  if isinstance(df[col].type, unexpected_types):
1470
1649
  if raise_error:
@@ -1547,6 +1726,60 @@ class _Validators:
1547
1726
  raise TeradataMlException(error_msg.replace("(not None)", "as '{}'".format(msg_arg_value)),
1548
1727
  MessageCodes.DEPENDENT_ARGUMENT)
1549
1728
  return True
1729
+
1730
+ @staticmethod
1731
+ @skip_validation()
1732
+ def _validate_dependent_method(dependent_mtd, independent_mtd, independent_mtd_calls):
1733
+ """
1734
+ DESCRIPTION:
1735
+ Function validates if an independent method has been called before a dependent method.
1736
+ Raises an error if the independent method is not called before the dependent method is called,
1737
+ otherwise, returns True.
1738
+
1739
+ PARAMETERS:
1740
+ dependent_mtd:
1741
+ Required Argument.
1742
+ Specifies the name of dependent method.
1743
+ Types: String
1744
+
1745
+ independent_mtd:
1746
+ Required Argument.
1747
+ Specifies the name of independent method.
1748
+ Types: String or List of Strings
1749
+
1750
+ independent_mtd_calls:
1751
+ Required Argument.
1752
+ Specifies the flag to check whether independent method is called or not.
1753
+ Types: bool or List of bool
1754
+
1755
+ RETURNS:
1756
+ True, when the independent method is called before the dependent method.
1757
+
1758
+ RAISES:
1759
+ TeradataMlException, when independent method is not called before the
1760
+ dependent method.
1761
+
1762
+ EXAMPLES:
1763
+ _Validators._validate_dependent_method("dependent_method", "independent_method", False)
1764
+ _Validators._validate_dependent_method("dependent_method", "independent_method", True)
1765
+ _Validators._validate_dependent_method("dependent_method", ["independent_method1", "independent_method2"], [False, False])
1766
+ """
1767
+ # Check if all independent method calls are False
1768
+ independent_mtd_calls = [independent_mtd_calls] \
1769
+ if not isinstance(independent_mtd_calls, list) else independent_mtd_calls
1770
+ all_false = all(not value for value in independent_mtd_calls)
1771
+
1772
+ # Check if any of the independent method is called before dependent method
1773
+ if dependent_mtd and all_false:
1774
+ error_code = MessageCodes.DEPENDENT_METHOD
1775
+
1776
+ if isinstance(independent_mtd, str):
1777
+ independent_mtd = [independent_mtd]
1778
+ independent_mtd = ' or '.join(f"'{item}'" for item in independent_mtd)
1779
+
1780
+ error_msg = Messages.get_message(error_code, independent_mtd, dependent_mtd)
1781
+ raise TeradataMlException(error_msg, error_code)
1782
+ return True
1550
1783
 
1551
1784
  @staticmethod
1552
1785
  @skip_validation()
@@ -1755,7 +1988,7 @@ class _Validators:
1755
1988
  arg2 = True if arg2 == 0 else bool(arg2)
1756
1989
 
1757
1990
  # Either both the arguments are specified or both are None.
1758
- if not(all([arg1, arg2]) or both_args_none):
1991
+ if not (all([arg1, arg2]) or both_args_none):
1759
1992
  arg_order = [err_disp_arg1_name, err_disp_arg2_name] if arg1 \
1760
1993
  else [err_disp_arg2_name, err_disp_arg1_name]
1761
1994
  raise TeradataMlException(Messages.get_message(
@@ -1792,7 +2025,7 @@ class _Validators:
1792
2025
  _Validators._validate_file_extension("/data/mapper.py",".py")
1793
2026
  _Validators._validate_file_extension("ml__demoenv_requirements_1605727131624097.txt",".txt")
1794
2027
  """
1795
- extension = extension if isinstance(extension, list) else[extension]
2028
+ extension = extension if isinstance(extension, list) else [extension]
1796
2029
  file_extension = file_path.lower().split('.')[-1]
1797
2030
  if file_extension not in extension:
1798
2031
  raise TeradataMlException(
@@ -1801,7 +2034,6 @@ class _Validators:
1801
2034
 
1802
2035
  return True
1803
2036
 
1804
-
1805
2037
  @staticmethod
1806
2038
  @skip_validation()
1807
2039
  def _validate_argument_is_not_None(arg, arg_name, additional_error="", reverse=False):
@@ -1857,7 +2089,6 @@ class _Validators:
1857
2089
  "provided {}".format(additional_error), ""))
1858
2090
  return True
1859
2091
 
1860
-
1861
2092
  @staticmethod
1862
2093
  @skip_validation()
1863
2094
  def _validate_dataframe(df, raise_error=True):
@@ -1902,7 +2133,6 @@ class _Validators:
1902
2133
  raise TeradataMlException(Messages.get_message(MessageCodes.IS_NOT_VALID_DF),
1903
2134
  MessageCodes.IS_NOT_VALID_DF)
1904
2135
 
1905
-
1906
2136
  @staticmethod
1907
2137
  @skip_validation()
1908
2138
  def _validate_column_value_length(argument_name, argument_value, allowed_length,
@@ -1951,11 +2181,10 @@ class _Validators:
1951
2181
  error_msg = Messages.get_message(
1952
2182
  error_code, operation,
1953
2183
  'Length of argument {0} ({1}) is more than the allowed length ({2}).'
1954
- .format(argument_name, len(argument_value), allowed_length))
2184
+ .format(argument_name, len(argument_value), allowed_length))
1955
2185
  raise TeradataMlException(error_msg, error_code)
1956
2186
  return True
1957
2187
 
1958
-
1959
2188
  @staticmethod
1960
2189
  @skip_validation()
1961
2190
  def _validate_list_lengths_equal(list1, arg_name1, list2, arg_name2):
@@ -2139,15 +2368,15 @@ class _Validators:
2139
2368
  err_val = key
2140
2369
  raise ValueError(
2141
2370
  Messages.get_message(MessageCodes.INVALID_DICT_ARG_VALUE, err_val,
2142
- err_str, arg_name, permitted_values))
2143
-
2371
+ err_str, arg_name, permitted_values))
2372
+
2144
2373
  # Catch ValueError raised by '_validate_function_arguments'
2145
2374
  # for empty string value.
2146
2375
  elif "TDML_2004" in str(ve):
2147
2376
  err_str = "Key" if "<dict_key>" in str(ve) else "Value"
2148
2377
  raise ValueError(
2149
2378
  Messages.get_message(MessageCodes.DICT_ARG_KEY_VALUE_EMPTY,
2150
- err_str, arg_name))
2379
+ err_str, arg_name))
2151
2380
 
2152
2381
  except TypeError as te:
2153
2382
  # Catch TypeError raised by '_validate_function_arguments' to
@@ -2163,7 +2392,7 @@ class _Validators:
2163
2392
  kv_type.__name__ for kv_type in permitted_types]
2164
2393
 
2165
2394
  raise TypeError(
2166
- Messages.get_message(MessageCodes.UNSUPPORTED_DICT_KEY_VALUE_DTYPE, err_str,
2395
+ Messages.get_message(MessageCodes.UNSUPPORTED_DICT_KEY_VALUE_DTYPE, err_str,
2167
2396
  arg_name, permitted_types))
2168
2397
 
2169
2398
  return True
@@ -2249,7 +2478,6 @@ class _Validators:
2249
2478
  MessageCodes.IMPORT_PYTHON_PACKAGE)
2250
2479
  return True
2251
2480
 
2252
-
2253
2481
  @staticmethod
2254
2482
  @skip_validation()
2255
2483
  def _validate_ipaddress(ip_address):
@@ -2278,7 +2506,6 @@ class _Validators:
2278
2506
  'of four numbers (each between 0 and 255) separated by periods'))
2279
2507
 
2280
2508
  return True
2281
-
2282
2509
 
2283
2510
  @staticmethod
2284
2511
  @skip_validation()
@@ -2303,7 +2530,95 @@ class _Validators:
2303
2530
  >>> _Validators._check_auth_token("udf")
2304
2531
  """
2305
2532
  if _InternalBuffer.get("auth_token") is None:
2306
- raise TeradataMlException(Messages.get_message(MessageCodes.AUTH_TOKEN_REQUIRED,\
2307
- func_name), MessageCodes.AUTH_TOKEN_REQUIRED)
2308
-
2309
- return True
2533
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS, \
2534
+ 'Auth Token', func_name,
2535
+ 'set_auth_token'),
2536
+ MessageCodes.SET_REQUIRED_PARAMS)
2537
+
2538
+ return True
2539
+
2540
+ @staticmethod
2541
+ def _check_required_params(arg_value, arg_name, caller_func_name, target_func_name):
2542
+ """
2543
+ DESCRIPTION:
2544
+ Check if the required argument is not None.
2545
+
2546
+ PARAMETERS:
2547
+ arg_value:
2548
+ Required Argument.
2549
+ Specifies the argument value to be
2550
+ checked for non None values.
2551
+ Types: str, float, int, bool
2552
+
2553
+ arg_name:
2554
+ Required Argument.
2555
+ Specifies the argument name.
2556
+ Types: str
2557
+
2558
+ caller_func_name:
2559
+ Required Argument.
2560
+ Specifies the function name which calls this function.
2561
+ This is required for the error message.
2562
+ Types: str
2563
+
2564
+ target_func_name:
2565
+ Required Argument.
2566
+ Specifies the function name which the user needs to call
2567
+ so that the error is fixed.
2568
+ This is required for the error message.
2569
+ Types: str
2570
+
2571
+ RAISES:
2572
+ TeradataMLException
2573
+
2574
+ RETURNS:
2575
+ True.
2576
+
2577
+ EXAMPLES:
2578
+ >>> _Validators._check_required_params("udf", "arg_name")
2579
+ """
2580
+ if arg_value is None:
2581
+ raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS, \
2582
+ arg_name, caller_func_name,
2583
+ target_func_name),
2584
+ MessageCodes.SET_REQUIRED_PARAMS)
2585
+ return True
2586
+
2587
+ @staticmethod
2588
+ def _valid_list_length(arg_value, arg_name, required_length):
2589
+ """
2590
+ DESCRIPTION:
2591
+ Check if the argument has length matching the required length.
2592
+
2593
+ PARAMETERS:
2594
+ arg_value:
2595
+ Required Argument.
2596
+ Specifies the argument value.
2597
+ Types: _ListOf
2598
+
2599
+ arg_name:
2600
+ Required Argument.
2601
+ Specifies the argument name.
2602
+ Types: str
2603
+
2604
+ required_length:
2605
+ Required Argument.
2606
+ Specifies the required list length.
2607
+ Types: int
2608
+
2609
+ RAISES:
2610
+ TeradataMlException
2611
+
2612
+ RETURNS:
2613
+ True.
2614
+
2615
+ EXAMPLES:
2616
+ >>> _Validators._valid_list_length(["udf", "udf1"], "arg_name", 2)
2617
+ """
2618
+ if (isinstance(arg_value, list) and len(arg_value) != required_length) or \
2619
+ (not isinstance(arg_value, list)):
2620
+ raise TeradataMlException(Messages.get_message(
2621
+ MessageCodes.INVALID_LIST_LENGTH).format(arg_name,
2622
+ required_length),
2623
+ MessageCodes.INVALID_LIST_LENGTH)
2624
+ return True