teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +86 -13
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +7 -12
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +16 -1
- teradataml/analytics/utils.py +15 -1
- teradataml/automl/__init__.py +290 -106
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +29 -10
- teradataml/automl/data_transformation.py +11 -0
- teradataml/automl/feature_engineering.py +64 -4
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +1 -1
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/constants.py +61 -26
- teradataml/common/messagecodes.py +2 -1
- teradataml/common/messages.py +5 -4
- teradataml/common/utils.py +255 -37
- teradataml/context/context.py +225 -87
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +13 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/dataframe/copy_to.py +37 -26
- teradataml/dataframe/data_transfer.py +61 -45
- teradataml/dataframe/dataframe.py +130 -50
- teradataml/dataframe/dataframe_utils.py +15 -2
- teradataml/dataframe/functions.py +109 -9
- teradataml/dataframe/sql.py +328 -76
- teradataml/dbutils/dbutils.py +33 -13
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/_base.py +6 -157
- teradataml/options/configure.py +4 -5
- teradataml/scriptmgmt/UserEnv.py +305 -38
- teradataml/scriptmgmt/lls_utils.py +376 -130
- teradataml/store/__init__.py +1 -1
- teradataml/table_operators/Apply.py +16 -1
- teradataml/table_operators/Script.py +20 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +2 -1
- teradataml/utils/internal_buffer.py +22 -2
- teradataml/utils/validators.py +313 -57
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +89 -14
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +107 -77
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
teradataml/utils/validators.py
CHANGED
|
@@ -28,13 +28,16 @@ def skip_validation():
|
|
|
28
28
|
def validation_func(): ...
|
|
29
29
|
|
|
30
30
|
"""
|
|
31
|
+
|
|
31
32
|
def decorator(func):
|
|
32
33
|
def wrapper(*args, **kwargs):
|
|
33
34
|
# If "skip_all" flag is set to False,
|
|
34
35
|
# skip all validation execution.
|
|
35
36
|
if not _Validators.skip_all:
|
|
36
37
|
return func(*args, **kwargs)
|
|
38
|
+
|
|
37
39
|
return wraps(func)(wrapper)
|
|
40
|
+
|
|
38
41
|
return decorator
|
|
39
42
|
|
|
40
43
|
|
|
@@ -286,7 +289,7 @@ class _Validators:
|
|
|
286
289
|
|
|
287
290
|
@staticmethod
|
|
288
291
|
@skip_validation()
|
|
289
|
-
def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False,
|
|
292
|
+
def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False,
|
|
290
293
|
case_insensitive=False):
|
|
291
294
|
"""
|
|
292
295
|
Function to check whether column names in columns are present in given dataframe or not.
|
|
@@ -346,7 +349,7 @@ class _Validators:
|
|
|
346
349
|
columns = [columns]
|
|
347
350
|
|
|
348
351
|
total_columns = []
|
|
349
|
-
|
|
352
|
+
|
|
350
353
|
for column in columns:
|
|
351
354
|
for separator in TeradataConstants.RANGE_SEPARATORS.value:
|
|
352
355
|
if column is None:
|
|
@@ -365,33 +368,33 @@ class _Validators:
|
|
|
365
368
|
# ':' specifies all columns in the table.
|
|
366
369
|
|
|
367
370
|
try:
|
|
368
|
-
# Check if
|
|
371
|
+
# Check if it's a single column with one separator. For e.g. column:A.
|
|
369
372
|
# If yes, just continue.
|
|
370
|
-
_Validators._validate_column_exists_in_dataframe(column, data._metaexpr,
|
|
373
|
+
_Validators._validate_column_exists_in_dataframe(column, data._metaexpr,
|
|
374
|
+
case_insensitive=case_insensitive)
|
|
371
375
|
continue
|
|
372
376
|
except:
|
|
373
377
|
# User has provided range value.
|
|
374
378
|
column_names = column.split(separator)
|
|
375
379
|
if (len(column_names) == 2 and
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
380
|
+
any([column_names[0].isdigit(), column_names[1].isdigit()]) and
|
|
381
|
+
not all([column_names[0].isdigit(), column_names[1].isdigit()]) and
|
|
382
|
+
not "" in column_names):
|
|
383
|
+
# Raises Exception if column range has mixed types. For e.g. "4:XYZ".
|
|
384
|
+
err_msg = Messages.get_message(MessageCodes.MIXED_TYPES_IN_COLUMN_RANGE)
|
|
385
|
+
raise ValueError(err_msg.format(column_arg))
|
|
386
|
+
|
|
383
387
|
for col in column_names:
|
|
384
388
|
if not col.isdigit() and col != "":
|
|
385
389
|
total_columns.append(col)
|
|
386
390
|
|
|
387
391
|
elif column.count(separator) > 1:
|
|
388
|
-
continue
|
|
392
|
+
continue
|
|
389
393
|
else:
|
|
390
394
|
total_columns.append(column)
|
|
391
395
|
|
|
392
396
|
return _Validators._validate_column_exists_in_dataframe(total_columns, data._metaexpr, column_arg=column_arg,
|
|
393
397
|
data_arg=data_arg, case_insensitive=case_insensitive)
|
|
394
|
-
|
|
395
398
|
|
|
396
399
|
@staticmethod
|
|
397
400
|
@skip_validation()
|
|
@@ -455,9 +458,9 @@ class _Validators:
|
|
|
455
458
|
if isinstance(columns, str):
|
|
456
459
|
columns = [columns]
|
|
457
460
|
|
|
458
|
-
# Constructing New
|
|
461
|
+
# Constructing New unquoted column names for selected columns ONLY using Parent _metaexpr
|
|
459
462
|
if case_insensitive:
|
|
460
|
-
# If lookup has to be a case
|
|
463
|
+
# If lookup has to be a case-insensitive then convert the
|
|
461
464
|
# metaexpr columns name to lower case.
|
|
462
465
|
unquoted_df_columns = [c.name.replace('"', "").lower() for c in metaexpr.c]
|
|
463
466
|
else:
|
|
@@ -468,13 +471,10 @@ class _Validators:
|
|
|
468
471
|
if column_name is None:
|
|
469
472
|
column_name = str(column_name)
|
|
470
473
|
|
|
471
|
-
if case_insensitive
|
|
472
|
-
# If lookup has to be a case insensitive then convert the
|
|
473
|
-
# column name to lower case.
|
|
474
|
-
column_name = column_name.lower()
|
|
474
|
+
case_based_column_name = column_name.lower() if case_insensitive else column_name
|
|
475
475
|
|
|
476
476
|
# If column name does not exist in metaexpr, raise the exception
|
|
477
|
-
if not
|
|
477
|
+
if not case_based_column_name.replace('"', "") in unquoted_df_columns:
|
|
478
478
|
if column_arg and data_arg:
|
|
479
479
|
raise ValueError(Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND,
|
|
480
480
|
column_name,
|
|
@@ -483,7 +483,8 @@ class _Validators:
|
|
|
483
483
|
"Table" if for_table else "DataFrame"))
|
|
484
484
|
else:
|
|
485
485
|
raise ValueError(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL,
|
|
486
|
-
column_name,
|
|
486
|
+
column_name,
|
|
487
|
+
sorted([c.name.replace('"', "") for c in metaexpr.c])))
|
|
487
488
|
|
|
488
489
|
return True
|
|
489
490
|
|
|
@@ -518,7 +519,7 @@ class _Validators:
|
|
|
518
519
|
|
|
519
520
|
@staticmethod
|
|
520
521
|
@skip_validation()
|
|
521
|
-
def _validate_function_arguments(arg_list, skip_empty_check
|
|
522
|
+
def _validate_function_arguments(arg_list, skip_empty_check=None):
|
|
522
523
|
"""
|
|
523
524
|
Method to verify that the input arguments are of valid data type except for
|
|
524
525
|
argument of DataFrameType.
|
|
@@ -543,10 +544,29 @@ class _Validators:
|
|
|
543
544
|
EXAMPLES:
|
|
544
545
|
_Validators._validate_function_arguments(arg_list)
|
|
545
546
|
"""
|
|
547
|
+
# arg_list is list of list, where each inner list can have maximum 6 elements
|
|
548
|
+
# and must have minimum (first) 4 elements:
|
|
549
|
+
# Consider following inner list.
|
|
550
|
+
# [element1, element2, element3, element4, element5, element6]
|
|
551
|
+
# Corresponds to:
|
|
552
|
+
# [<1_arg_name>, <2_arg_value>, <3_is_optional>, <4_tuple_of_accepted_types>,
|
|
553
|
+
# <5_empty_not_allowed>, <6_list_of_permitted_values>]
|
|
554
|
+
# e.g.
|
|
555
|
+
# arg_list = [["join", join, True, (str), True, concat_join_permitted_values]]
|
|
556
|
+
# 1. element1 --> Argument Name, a string. ["join" in above example.]
|
|
557
|
+
# 2. element2 --> Argument itself. [join]
|
|
558
|
+
# 3. element3 --> Specifies a flag that mentions if argument is optional or not.
|
|
559
|
+
# False means required argument and True means optional argument.
|
|
560
|
+
# 4. element4 --> Tuple of accepted types. (str) in above example.
|
|
561
|
+
# 5. element5 --> True, means validate for empty value. Error will be raised, if empty values are passed.
|
|
562
|
+
# If not specified, argument value will not be validated for empty value.
|
|
563
|
+
# 6. element6 --> A list of permitted values, an argument can accept.
|
|
564
|
+
# If not specified, argument value will not be validated against any permitted values.
|
|
565
|
+
# If a list is passed, validation will be performed for permitted values.
|
|
546
566
|
invalid_arg_names = []
|
|
547
567
|
invalid_arg_types = []
|
|
548
568
|
|
|
549
|
-
|
|
569
|
+
type_check_failed = False
|
|
550
570
|
|
|
551
571
|
for args in arg_list:
|
|
552
572
|
num_args = len(args)
|
|
@@ -569,7 +589,7 @@ class _Validators:
|
|
|
569
589
|
# Let's validate argument types.
|
|
570
590
|
#
|
|
571
591
|
# Verify datatypes for arguments which are required or the optional arguments are not None
|
|
572
|
-
if (
|
|
592
|
+
if (args[2] == True and args[1] is not None) or (args[2] == False):
|
|
573
593
|
# Validate the types of argument, if expected types are instance of tuple or type
|
|
574
594
|
dtype_list = _Validators.__getTypeAsStr(args[3])
|
|
575
595
|
|
|
@@ -586,13 +606,13 @@ class _Validators:
|
|
|
586
606
|
if not _Validators._check_isinstance(value, args[3]):
|
|
587
607
|
invalid_arg_names.append(args[0])
|
|
588
608
|
invalid_arg_types.append(valid_types_str)
|
|
589
|
-
|
|
609
|
+
type_check_failed = True
|
|
590
610
|
break
|
|
591
611
|
elif not _Validators._check_isinstance(args[1], args[3]):
|
|
592
612
|
# Argument is not of type list.
|
|
593
613
|
invalid_arg_names.append(args[0])
|
|
594
614
|
invalid_arg_types.append(valid_types_str)
|
|
595
|
-
|
|
615
|
+
type_check_failed = True
|
|
596
616
|
|
|
597
617
|
elif isinstance(args[3], tuple):
|
|
598
618
|
# Argument can accept values of multiple types, but not list.
|
|
@@ -600,30 +620,30 @@ class _Validators:
|
|
|
600
620
|
if not _Validators._check_isinstance(args[1], args[3]):
|
|
601
621
|
invalid_arg_names.append(args[0])
|
|
602
622
|
invalid_arg_types.append(valid_types_str)
|
|
603
|
-
|
|
623
|
+
type_check_failed = True
|
|
604
624
|
else:
|
|
605
625
|
# Argument can accept values of single type.
|
|
606
626
|
valid_types_str = " or ".join(dtype_list)
|
|
607
627
|
if not _Validators._check_isinstance(args[1], args[3]):
|
|
608
628
|
invalid_arg_names.append(args[0])
|
|
609
629
|
invalid_arg_types.append(valid_types_str)
|
|
610
|
-
|
|
630
|
+
type_check_failed = True
|
|
611
631
|
|
|
612
632
|
#
|
|
613
633
|
# Validate the arguments for empty value
|
|
614
634
|
#
|
|
615
|
-
if not
|
|
635
|
+
if not type_check_failed and len(args) >= 5:
|
|
616
636
|
if args[4]:
|
|
617
637
|
_Validators._validate_input_columns_not_empty(args[1], args[0], skip_empty_check)
|
|
618
638
|
|
|
619
639
|
#
|
|
620
640
|
# Validate the arguments for permitted values
|
|
621
641
|
#
|
|
622
|
-
if not
|
|
642
|
+
if not type_check_failed and len(args) >= 6:
|
|
623
643
|
if args[5] is not None:
|
|
624
644
|
_Validators._validate_permitted_values(args[1], args[5], args[0], supported_types=args[3])
|
|
625
645
|
|
|
626
|
-
if
|
|
646
|
+
if type_check_failed:
|
|
627
647
|
if len(invalid_arg_names) != 0:
|
|
628
648
|
raise TypeError(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
629
649
|
invalid_arg_names, invalid_arg_types))
|
|
@@ -632,7 +652,7 @@ class _Validators:
|
|
|
632
652
|
|
|
633
653
|
@staticmethod
|
|
634
654
|
@skip_validation()
|
|
635
|
-
def _validate_input_columns_not_empty(arg, arg_name, skip_empty_check
|
|
655
|
+
def _validate_input_columns_not_empty(arg, arg_name, skip_empty_check=None):
|
|
636
656
|
"""
|
|
637
657
|
Function to check whether argument is empty string or not.
|
|
638
658
|
|
|
@@ -664,7 +684,7 @@ class _Validators:
|
|
|
664
684
|
for col in arg:
|
|
665
685
|
if not (skip_empty_check and arg_name in skip_empty_check.keys() and col in skip_empty_check[arg_name]):
|
|
666
686
|
if isinstance(col, str):
|
|
667
|
-
if (not (col is None) and (
|
|
687
|
+
if (not (col is None)) and (len(col.strip()) == 0):
|
|
668
688
|
raise ValueError(Messages.get_message(MessageCodes.ARG_EMPTY, arg_name))
|
|
669
689
|
return True
|
|
670
690
|
|
|
@@ -704,8 +724,8 @@ class _Validators:
|
|
|
704
724
|
if args[2] == False and args[1] is None:
|
|
705
725
|
miss_args.append(args[0])
|
|
706
726
|
|
|
707
|
-
if
|
|
708
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS,miss_args),
|
|
727
|
+
if len(miss_args) > 0:
|
|
728
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, miss_args),
|
|
709
729
|
MessageCodes.MISSING_ARGS)
|
|
710
730
|
return True
|
|
711
731
|
|
|
@@ -813,7 +833,7 @@ class _Validators:
|
|
|
813
833
|
invalid_values.sort()
|
|
814
834
|
|
|
815
835
|
# Concatenate the message for datatypes not present in datatypes of permitted_values.
|
|
816
|
-
if len(add_types) > 0
|
|
836
|
+
if len(add_types) > 0:
|
|
817
837
|
add_types = _Validators.__getTypeAsStr(add_types)
|
|
818
838
|
add_types = " or ".join(add_types)
|
|
819
839
|
permitted_values = "{} {}".format(permitted_values, "or any values of type {}".format(add_types))
|
|
@@ -978,7 +998,7 @@ class _Validators:
|
|
|
978
998
|
return True
|
|
979
999
|
|
|
980
1000
|
is_arg_in_lower_bound, is_arg_in_upper_bound = True, True
|
|
981
|
-
lbound_msg,
|
|
1001
|
+
lbound_msg, ubound_msg = "", ""
|
|
982
1002
|
|
|
983
1003
|
# Check for lower bound.
|
|
984
1004
|
if lbound is not None:
|
|
@@ -1071,7 +1091,7 @@ class _Validators:
|
|
|
1071
1091
|
if timebucket_duration is None:
|
|
1072
1092
|
return True
|
|
1073
1093
|
|
|
1074
|
-
# Check if notation
|
|
1094
|
+
# Check if notation is formal or shorthand (beginning with a digit)
|
|
1075
1095
|
if timebucket_duration[0].isdigit():
|
|
1076
1096
|
valid_timebucket_durations = PTITableConstants.VALID_TIMEBUCKET_DURATIONS_SHORTHAND.value
|
|
1077
1097
|
pattern_to_use = PTITableConstants.PATTERN_TIMEBUCKET_DURATION_SHORT.value
|
|
@@ -1099,7 +1119,7 @@ class _Validators:
|
|
|
1099
1119
|
|
|
1100
1120
|
@staticmethod
|
|
1101
1121
|
@skip_validation()
|
|
1102
|
-
def _validate_column_type(df, col, col_arg, expected_types, raiseError
|
|
1122
|
+
def _validate_column_type(df, col, col_arg, expected_types, raiseError=True):
|
|
1103
1123
|
"""
|
|
1104
1124
|
Internal function to validate the type of an input DataFrame column against
|
|
1105
1125
|
a list of expected types.
|
|
@@ -1123,7 +1143,7 @@ class _Validators:
|
|
|
1123
1143
|
|
|
1124
1144
|
expected_types:
|
|
1125
1145
|
Required Argument.
|
|
1126
|
-
Specifies a list of
|
|
1146
|
+
Specifies a list of teradatasqlalchemy datatypes that the column is
|
|
1127
1147
|
expected to be of type.
|
|
1128
1148
|
Types: list of teradatasqlalchemy types
|
|
1129
1149
|
|
|
@@ -1134,10 +1154,10 @@ class _Validators:
|
|
|
1134
1154
|
Types: bool
|
|
1135
1155
|
|
|
1136
1156
|
RETURNS:
|
|
1137
|
-
True, when the
|
|
1157
|
+
True, when the column is of an expected type.
|
|
1138
1158
|
|
|
1139
1159
|
RAISES:
|
|
1140
|
-
TeradataMlException, when the
|
|
1160
|
+
TeradataMlException, when the column is not one of the expected types.
|
|
1141
1161
|
|
|
1142
1162
|
EXAMPLES:
|
|
1143
1163
|
_Validators._validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES)
|
|
@@ -1405,6 +1425,155 @@ class _Validators:
|
|
|
1405
1425
|
err_disp_arg2_name), MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
1406
1426
|
return True
|
|
1407
1427
|
|
|
1428
|
+
@staticmethod
|
|
1429
|
+
@skip_validation()
|
|
1430
|
+
def _validate_mutually_exclusive_argument_groups(*arg_groups, all_falsy_check=False,
|
|
1431
|
+
return_all_falsy_status=False):
|
|
1432
|
+
"""
|
|
1433
|
+
DESCRIPTION:
|
|
1434
|
+
Function to validate mutual exclusiveness of group of arguments.
|
|
1435
|
+
|
|
1436
|
+
PARAMETERS:
|
|
1437
|
+
*arg_groups:
|
|
1438
|
+
Specifies variable length argument list where each argument in list is a dictionary
|
|
1439
|
+
representing one group of arguments which should be mutually exclusive from
|
|
1440
|
+
other groups of arguments. Each dictionary contains key-value pairs for argument
|
|
1441
|
+
name and its value.
|
|
1442
|
+
|
|
1443
|
+
all_falsy_check:
|
|
1444
|
+
Optional Argument.
|
|
1445
|
+
Specifies whether to throw Teradataml Exception when all arguments in all argument
|
|
1446
|
+
groups hold Falsy/null values.
|
|
1447
|
+
Types: bool
|
|
1448
|
+
|
|
1449
|
+
return_all_falsy_status:
|
|
1450
|
+
Optional Argument.
|
|
1451
|
+
Specifies whether to return the boolean flag which states if all arguments in all argument
|
|
1452
|
+
groups hold Falsy/null values.
|
|
1453
|
+
Types: bool
|
|
1454
|
+
|
|
1455
|
+
RETURNS:
|
|
1456
|
+
* When "return_all_falsy_status" is True:
|
|
1457
|
+
* True: If all arguments in all argument groups hold Falsy/null values.
|
|
1458
|
+
* False: If all arguments in all argument groups do not hold Falsy/null values.
|
|
1459
|
+
* When "return_all_falsy_status" is False:
|
|
1460
|
+
None
|
|
1461
|
+
RAISES:
|
|
1462
|
+
TeradataMLException
|
|
1463
|
+
|
|
1464
|
+
EXAMPLES:
|
|
1465
|
+
# Example 1: When groups of arguments are not mutually exclusive.
|
|
1466
|
+
>>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": "arg1"},
|
|
1467
|
+
... {"arg2": "arg2"},
|
|
1468
|
+
... {"arg3": "arg3", "arg4": "arg4"})
|
|
1469
|
+
[Teradata][teradataml](TDML_2061) Provide either '['arg1']' argument(s) or '['arg2']' argument(s) or '['arg3', 'arg4']' argument(s).
|
|
1470
|
+
|
|
1471
|
+
# Example 2: When groups of arguments are mutually exclusive.
|
|
1472
|
+
>>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
|
|
1473
|
+
... {"arg2": ""},
|
|
1474
|
+
... {"arg3": "arg3", "arg4": "arg4"})
|
|
1475
|
+
|
|
1476
|
+
# Example 3: When all groups of arguments hold falsy values
|
|
1477
|
+
# and "all_falsy_check" is set to True.
|
|
1478
|
+
>>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
|
|
1479
|
+
... {"arg2": None},
|
|
1480
|
+
... {"arg3": None, "arg4": None},
|
|
1481
|
+
... all_falsy_check=True)
|
|
1482
|
+
[Teradata][teradataml](TDML_2061) Provide either '['arg1']' argument(s) or '['arg2']' argument(s) or '['arg3', 'arg4']' argument(s).
|
|
1483
|
+
|
|
1484
|
+
# Example 4: When all groups of arguments hold falsy values
|
|
1485
|
+
# and "all_falsy_check" is set to False.
|
|
1486
|
+
>>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
|
|
1487
|
+
... {"arg2": None},
|
|
1488
|
+
... {"arg3": None, "arg4": None})
|
|
1489
|
+
|
|
1490
|
+
# Example 5: When all groups of arguments hold falsy values
|
|
1491
|
+
# and "all_falsy_check" is set to False and
|
|
1492
|
+
# "return_all_falsy_status" is set to True.
|
|
1493
|
+
>>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
|
|
1494
|
+
... {"arg2": None},
|
|
1495
|
+
... {"arg3": None, "arg4": None},
|
|
1496
|
+
... return_all_falsy_status=True)
|
|
1497
|
+
True
|
|
1498
|
+
"""
|
|
1499
|
+
all_groups_falsy = True
|
|
1500
|
+
mutually_exclusive_groups = True
|
|
1501
|
+
non_falsy_groups = []
|
|
1502
|
+
for arg_grp in arg_groups:
|
|
1503
|
+
# TODO: Handling of falsy values can be done in more appropriate way by
|
|
1504
|
+
# differentiating None/empty string/empty list.
|
|
1505
|
+
is_group_falsy = not any(value for value in arg_grp.values())
|
|
1506
|
+
if not is_group_falsy:
|
|
1507
|
+
non_falsy_groups.append(arg_grp)
|
|
1508
|
+
|
|
1509
|
+
# Current group is having non-falsy values and already traversed
|
|
1510
|
+
# group(s) also has(have) non-falsy values. So set "mutually_exclusive_groups" to False.
|
|
1511
|
+
if not all_groups_falsy:
|
|
1512
|
+
mutually_exclusive_groups = False
|
|
1513
|
+
|
|
1514
|
+
all_groups_falsy = all_groups_falsy and is_group_falsy
|
|
1515
|
+
|
|
1516
|
+
# Raise error if any one of the below-mentioned conditions is True:
|
|
1517
|
+
# More than one group has non-falsy values.
|
|
1518
|
+
# All groups have all falsy values and "all_falsy_check" is True.
|
|
1519
|
+
if not mutually_exclusive_groups or (all_falsy_check and all_groups_falsy):
|
|
1520
|
+
if not non_falsy_groups:
|
|
1521
|
+
non_falsy_groups = [str(list(arg_grp.keys())) for arg_grp in arg_groups]
|
|
1522
|
+
else:
|
|
1523
|
+
non_falsy_groups = [str(list(non_falsy_group.keys())) for non_falsy_group in non_falsy_groups]
|
|
1524
|
+
error_msg = Messages.get_message(
|
|
1525
|
+
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, str(non_falsy_groups[0]),
|
|
1526
|
+
"' argument(s) or \'".join(non_falsy_groups[1:]))
|
|
1527
|
+
|
|
1528
|
+
raise TeradataMlException(error_msg, MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
1529
|
+
|
|
1530
|
+
if return_all_falsy_status:
|
|
1531
|
+
return all_groups_falsy
|
|
1532
|
+
|
|
1533
|
+
@staticmethod
|
|
1534
|
+
@skip_validation()
|
|
1535
|
+
def _validate_mutually_inclusive_n_arguments(**kwargs):
|
|
1536
|
+
"""
|
|
1537
|
+
DESCRIPTION:
|
|
1538
|
+
Function to validate mutual inclusiveness of group of arguments.
|
|
1539
|
+
|
|
1540
|
+
PARAMETERS:
|
|
1541
|
+
**kwargs:
|
|
1542
|
+
Specifies variable number of keyword arguments which are to be
|
|
1543
|
+
validated for mutual inclusiveness.
|
|
1544
|
+
|
|
1545
|
+
RETURNS:
|
|
1546
|
+
True, if arguments are mutually inclusive.
|
|
1547
|
+
|
|
1548
|
+
RAISES:
|
|
1549
|
+
TeradataMLException
|
|
1550
|
+
|
|
1551
|
+
EXAMPLES:
|
|
1552
|
+
Example 1: When all arguments have non-None values.
|
|
1553
|
+
>>> _Validators._validate_mutually_inclusive_n_arguments(arg1="arg1", arg2="arg2",
|
|
1554
|
+
... arg3="arg3", arg4="arg4")
|
|
1555
|
+
True
|
|
1556
|
+
|
|
1557
|
+
Example 2: When one of the arguments is empty string.
|
|
1558
|
+
>>> _Validators._validate_mutually_inclusive_n_arguments(arg1="arg1", arg2="arg2",
|
|
1559
|
+
... arg3="arg3", arg4="")
|
|
1560
|
+
TeradataMlException
|
|
1561
|
+
|
|
1562
|
+
Example 3: When one of the arguments is None.
|
|
1563
|
+
>>> _Validators._validate_mutually_inclusive_n_arguments(arg1="arg1", arg2=None,
|
|
1564
|
+
... arg3="arg3", arg4="arg4")
|
|
1565
|
+
TeradataMlException
|
|
1566
|
+
"""
|
|
1567
|
+
# TODO: Handling of falsy values can be done in more appropriate way by
|
|
1568
|
+
# differentiating None/empty string/empty list.
|
|
1569
|
+
if all(arg_value for arg, arg_value in kwargs.items()):
|
|
1570
|
+
return True
|
|
1571
|
+
else:
|
|
1572
|
+
arg_list = list(kwargs.keys())
|
|
1573
|
+
message = Messages.get_message(MessageCodes.MUST_PASS_ARGUMENT,
|
|
1574
|
+
arg_list[0], " and ".join(arg_list[1:]))
|
|
1575
|
+
raise TeradataMlException(message, MessageCodes.MUST_PASS_ARGUMENT)
|
|
1576
|
+
|
|
1408
1577
|
@staticmethod
|
|
1409
1578
|
@skip_validation()
|
|
1410
1579
|
def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True,
|
|
@@ -1557,6 +1726,60 @@ class _Validators:
|
|
|
1557
1726
|
raise TeradataMlException(error_msg.replace("(not None)", "as '{}'".format(msg_arg_value)),
|
|
1558
1727
|
MessageCodes.DEPENDENT_ARGUMENT)
|
|
1559
1728
|
return True
|
|
1729
|
+
|
|
1730
|
+
@staticmethod
|
|
1731
|
+
@skip_validation()
|
|
1732
|
+
def _validate_dependent_method(dependent_mtd, independent_mtd, independent_mtd_calls):
|
|
1733
|
+
"""
|
|
1734
|
+
DESCRIPTION:
|
|
1735
|
+
Function validates if an independent method has been called before a dependent method.
|
|
1736
|
+
Raises an error if the independent method is not called before the dependent method is called,
|
|
1737
|
+
otherwise, returns True.
|
|
1738
|
+
|
|
1739
|
+
PARAMETERS:
|
|
1740
|
+
dependent_mtd:
|
|
1741
|
+
Required Argument.
|
|
1742
|
+
Specifies the name of dependent method.
|
|
1743
|
+
Types: String
|
|
1744
|
+
|
|
1745
|
+
independent_mtd:
|
|
1746
|
+
Required Argument.
|
|
1747
|
+
Specifies the name of independent method.
|
|
1748
|
+
Types: String or List of Strings
|
|
1749
|
+
|
|
1750
|
+
independent_mtd_calls:
|
|
1751
|
+
Required Argument.
|
|
1752
|
+
Specifies the flag to check whether independent method is called or not.
|
|
1753
|
+
Types: bool or List of bool
|
|
1754
|
+
|
|
1755
|
+
RETURNS:
|
|
1756
|
+
True, when the independent method is called before the dependent method.
|
|
1757
|
+
|
|
1758
|
+
RAISES:
|
|
1759
|
+
TeradataMlException, when independent method is not called before the
|
|
1760
|
+
dependent method.
|
|
1761
|
+
|
|
1762
|
+
EXAMPLES:
|
|
1763
|
+
_Validators._validate_dependent_method("dependent_method", "independent_method", False)
|
|
1764
|
+
_Validators._validate_dependent_method("dependent_method", "independent_method", True)
|
|
1765
|
+
_Validators._validate_dependent_method("dependent_method", ["independent_method1", "independent_method2"], [False, False])
|
|
1766
|
+
"""
|
|
1767
|
+
# Check if all independent method calls are False
|
|
1768
|
+
independent_mtd_calls = [independent_mtd_calls] \
|
|
1769
|
+
if not isinstance(independent_mtd_calls, list) else independent_mtd_calls
|
|
1770
|
+
all_false = all(not value for value in independent_mtd_calls)
|
|
1771
|
+
|
|
1772
|
+
# Check if any of the independent method is called before dependent method
|
|
1773
|
+
if dependent_mtd and all_false:
|
|
1774
|
+
error_code = MessageCodes.DEPENDENT_METHOD
|
|
1775
|
+
|
|
1776
|
+
if isinstance(independent_mtd, str):
|
|
1777
|
+
independent_mtd = [independent_mtd]
|
|
1778
|
+
independent_mtd = ' or '.join(f"'{item}'" for item in independent_mtd)
|
|
1779
|
+
|
|
1780
|
+
error_msg = Messages.get_message(error_code, independent_mtd, dependent_mtd)
|
|
1781
|
+
raise TeradataMlException(error_msg, error_code)
|
|
1782
|
+
return True
|
|
1560
1783
|
|
|
1561
1784
|
@staticmethod
|
|
1562
1785
|
@skip_validation()
|
|
@@ -1765,7 +1988,7 @@ class _Validators:
|
|
|
1765
1988
|
arg2 = True if arg2 == 0 else bool(arg2)
|
|
1766
1989
|
|
|
1767
1990
|
# Either both the arguments are specified or both are None.
|
|
1768
|
-
if not(all([arg1, arg2]) or both_args_none):
|
|
1991
|
+
if not (all([arg1, arg2]) or both_args_none):
|
|
1769
1992
|
arg_order = [err_disp_arg1_name, err_disp_arg2_name] if arg1 \
|
|
1770
1993
|
else [err_disp_arg2_name, err_disp_arg1_name]
|
|
1771
1994
|
raise TeradataMlException(Messages.get_message(
|
|
@@ -1802,7 +2025,7 @@ class _Validators:
|
|
|
1802
2025
|
_Validators._validate_file_extension("/data/mapper.py",".py")
|
|
1803
2026
|
_Validators._validate_file_extension("ml__demoenv_requirements_1605727131624097.txt",".txt")
|
|
1804
2027
|
"""
|
|
1805
|
-
extension = extension if isinstance(extension, list) else[extension]
|
|
2028
|
+
extension = extension if isinstance(extension, list) else [extension]
|
|
1806
2029
|
file_extension = file_path.lower().split('.')[-1]
|
|
1807
2030
|
if file_extension not in extension:
|
|
1808
2031
|
raise TeradataMlException(
|
|
@@ -1811,7 +2034,6 @@ class _Validators:
|
|
|
1811
2034
|
|
|
1812
2035
|
return True
|
|
1813
2036
|
|
|
1814
|
-
|
|
1815
2037
|
@staticmethod
|
|
1816
2038
|
@skip_validation()
|
|
1817
2039
|
def _validate_argument_is_not_None(arg, arg_name, additional_error="", reverse=False):
|
|
@@ -1867,7 +2089,6 @@ class _Validators:
|
|
|
1867
2089
|
"provided {}".format(additional_error), ""))
|
|
1868
2090
|
return True
|
|
1869
2091
|
|
|
1870
|
-
|
|
1871
2092
|
@staticmethod
|
|
1872
2093
|
@skip_validation()
|
|
1873
2094
|
def _validate_dataframe(df, raise_error=True):
|
|
@@ -1912,7 +2133,6 @@ class _Validators:
|
|
|
1912
2133
|
raise TeradataMlException(Messages.get_message(MessageCodes.IS_NOT_VALID_DF),
|
|
1913
2134
|
MessageCodes.IS_NOT_VALID_DF)
|
|
1914
2135
|
|
|
1915
|
-
|
|
1916
2136
|
@staticmethod
|
|
1917
2137
|
@skip_validation()
|
|
1918
2138
|
def _validate_column_value_length(argument_name, argument_value, allowed_length,
|
|
@@ -1961,11 +2181,10 @@ class _Validators:
|
|
|
1961
2181
|
error_msg = Messages.get_message(
|
|
1962
2182
|
error_code, operation,
|
|
1963
2183
|
'Length of argument {0} ({1}) is more than the allowed length ({2}).'
|
|
1964
|
-
|
|
2184
|
+
.format(argument_name, len(argument_value), allowed_length))
|
|
1965
2185
|
raise TeradataMlException(error_msg, error_code)
|
|
1966
2186
|
return True
|
|
1967
2187
|
|
|
1968
|
-
|
|
1969
2188
|
@staticmethod
|
|
1970
2189
|
@skip_validation()
|
|
1971
2190
|
def _validate_list_lengths_equal(list1, arg_name1, list2, arg_name2):
|
|
@@ -2149,15 +2368,15 @@ class _Validators:
|
|
|
2149
2368
|
err_val = key
|
|
2150
2369
|
raise ValueError(
|
|
2151
2370
|
Messages.get_message(MessageCodes.INVALID_DICT_ARG_VALUE, err_val,
|
|
2152
|
-
|
|
2153
|
-
|
|
2371
|
+
err_str, arg_name, permitted_values))
|
|
2372
|
+
|
|
2154
2373
|
# Catch ValueError raised by '_validate_function_arguments'
|
|
2155
2374
|
# for empty string value.
|
|
2156
2375
|
elif "TDML_2004" in str(ve):
|
|
2157
2376
|
err_str = "Key" if "<dict_key>" in str(ve) else "Value"
|
|
2158
2377
|
raise ValueError(
|
|
2159
2378
|
Messages.get_message(MessageCodes.DICT_ARG_KEY_VALUE_EMPTY,
|
|
2160
|
-
|
|
2379
|
+
err_str, arg_name))
|
|
2161
2380
|
|
|
2162
2381
|
except TypeError as te:
|
|
2163
2382
|
# Catch TypeError raised by '_validate_function_arguments' to
|
|
@@ -2173,7 +2392,7 @@ class _Validators:
|
|
|
2173
2392
|
kv_type.__name__ for kv_type in permitted_types]
|
|
2174
2393
|
|
|
2175
2394
|
raise TypeError(
|
|
2176
|
-
Messages.get_message(MessageCodes.UNSUPPORTED_DICT_KEY_VALUE_DTYPE, err_str,
|
|
2395
|
+
Messages.get_message(MessageCodes.UNSUPPORTED_DICT_KEY_VALUE_DTYPE, err_str,
|
|
2177
2396
|
arg_name, permitted_types))
|
|
2178
2397
|
|
|
2179
2398
|
return True
|
|
@@ -2259,7 +2478,6 @@ class _Validators:
|
|
|
2259
2478
|
MessageCodes.IMPORT_PYTHON_PACKAGE)
|
|
2260
2479
|
return True
|
|
2261
2480
|
|
|
2262
|
-
|
|
2263
2481
|
@staticmethod
|
|
2264
2482
|
@skip_validation()
|
|
2265
2483
|
def _validate_ipaddress(ip_address):
|
|
@@ -2288,7 +2506,6 @@ class _Validators:
|
|
|
2288
2506
|
'of four numbers (each between 0 and 255) separated by periods'))
|
|
2289
2507
|
|
|
2290
2508
|
return True
|
|
2291
|
-
|
|
2292
2509
|
|
|
2293
2510
|
@staticmethod
|
|
2294
2511
|
@skip_validation()
|
|
@@ -2313,11 +2530,11 @@ class _Validators:
|
|
|
2313
2530
|
>>> _Validators._check_auth_token("udf")
|
|
2314
2531
|
"""
|
|
2315
2532
|
if _InternalBuffer.get("auth_token") is None:
|
|
2316
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS
|
|
2533
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS, \
|
|
2317
2534
|
'Auth Token', func_name,
|
|
2318
2535
|
'set_auth_token'),
|
|
2319
2536
|
MessageCodes.SET_REQUIRED_PARAMS)
|
|
2320
|
-
|
|
2537
|
+
|
|
2321
2538
|
return True
|
|
2322
2539
|
|
|
2323
2540
|
@staticmethod
|
|
@@ -2366,3 +2583,42 @@ class _Validators:
|
|
|
2366
2583
|
target_func_name),
|
|
2367
2584
|
MessageCodes.SET_REQUIRED_PARAMS)
|
|
2368
2585
|
return True
|
|
2586
|
+
|
|
2587
|
+
@staticmethod
|
|
2588
|
+
def _valid_list_length(arg_value, arg_name, required_length):
|
|
2589
|
+
"""
|
|
2590
|
+
DESCRIPTION:
|
|
2591
|
+
Check if the argument has length matching the required length.
|
|
2592
|
+
|
|
2593
|
+
PARAMETERS:
|
|
2594
|
+
arg_value:
|
|
2595
|
+
Required Argument.
|
|
2596
|
+
Specifies the argument value.
|
|
2597
|
+
Types: _ListOf
|
|
2598
|
+
|
|
2599
|
+
arg_name:
|
|
2600
|
+
Required Argument.
|
|
2601
|
+
Specifies the argument name.
|
|
2602
|
+
Types: str
|
|
2603
|
+
|
|
2604
|
+
required_length:
|
|
2605
|
+
Required Argument.
|
|
2606
|
+
Specifies the required list length.
|
|
2607
|
+
Types: int
|
|
2608
|
+
|
|
2609
|
+
RAISES:
|
|
2610
|
+
TeradataMlException
|
|
2611
|
+
|
|
2612
|
+
RETURNS:
|
|
2613
|
+
True.
|
|
2614
|
+
|
|
2615
|
+
EXAMPLES:
|
|
2616
|
+
>>> _Validators._valid_list_length(["udf", "udf1"], "arg_name", 2)
|
|
2617
|
+
"""
|
|
2618
|
+
if (isinstance(arg_value, list) and len(arg_value) != required_length) or \
|
|
2619
|
+
(not isinstance(arg_value, list)):
|
|
2620
|
+
raise TeradataMlException(Messages.get_message(
|
|
2621
|
+
MessageCodes.INVALID_LIST_LENGTH).format(arg_name,
|
|
2622
|
+
required_length),
|
|
2623
|
+
MessageCodes.INVALID_LIST_LENGTH)
|
|
2624
|
+
return True
|