teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -1,16 +1,19 @@
1
+ import datetime
1
2
  import enum
2
3
  import numbers
3
4
  import os
4
5
  import pandas as pd
5
6
  from pathlib import Path
6
7
  import re
7
- from teradataml.common.constants import TeradataConstants, PTITableConstants, PythonTypes
8
+ from sqlalchemy import func
9
+ from teradataml.common.constants import TeradataConstants, PTITableConstants, PythonTypes, DataFrameTypes
8
10
  from teradataml.common.exceptions import TeradataMlException
9
11
  from teradataml.common.messages import MessageCodes, Messages
10
12
  from teradataml.utils.dtypes import _Dtypes, _DtypesMappers, _ListOf, _TupleOf
11
13
  from teradataml.options.configure import configure
12
14
  from teradataml.dataframe.sql_interfaces import ColumnExpression
13
15
  from functools import wraps, reduce
16
+ from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIMESTAMP)
14
17
 
15
18
  from teradataml.utils.internal_buffer import _InternalBuffer
16
19
 
@@ -1351,27 +1354,36 @@ class _Validators:
1351
1354
  """
1352
1355
  DESCRIPTION:
1353
1356
  Function to validate whether the path specified is a file and if it exists.
1354
-
1357
+ Supports both single file path (str) and list of file paths.
1355
1358
  PARAMETERS:
1356
1359
  file_path:
1357
1360
  Required Argument.
1358
- Specifies the path of the file.
1359
- Types: str
1360
-
1361
+ Specifies the path of the file or list of file paths.
1362
+ Types: str or list of str
1361
1363
  RETURNS:
1362
- True, if the path is a file and it exists.
1363
-
1364
+ True, if all paths are files and exist.
1364
1365
  RAISES:
1365
1366
  TeradataMLException
1366
-
1367
1367
  EXAMPLES:
1368
- _Validators._validate_file_exists("/data/mapper.py")
1369
- """
1370
- if not Path(file_path).exists() or not os.path.isfile(file_path):
1368
+ Example 1: When a single file path is specified.
1369
+ >>> _Validators._validate_file_exists("/data/mapper.py")
1370
+ Example 2: When a list of file paths is specified.
1371
+ >>> _Validators._validate_file_exists(["/data/mapper.py", "/data/other.py"])
1372
+ """
1373
+ file_paths = [file_path] if isinstance(file_path, str) else file_path
1374
+ invalid_paths = []
1375
+
1376
+ # Validate if each file path exists and is a file.
1377
+ for fp in file_paths:
1378
+ if not Path(fp).exists() or not os.path.isfile(fp):
1379
+ invalid_paths.append(fp)
1380
+
1381
+ # If any of the file paths is invalid, raise an exception.
1382
+ if invalid_paths:
1371
1383
  raise TeradataMlException(
1372
- Messages.get_message(MessageCodes.INPUT_FILE_NOT_FOUND).format(file_path),
1384
+ Messages.get_message(MessageCodes.INPUT_FILE_NOT_FOUND).format(", ".join(invalid_paths)),
1373
1385
  MessageCodes.INPUT_FILE_NOT_FOUND)
1374
-
1386
+
1375
1387
  return True
1376
1388
 
1377
1389
  @staticmethod
@@ -1432,6 +1444,7 @@ class _Validators:
1432
1444
  @staticmethod
1433
1445
  @skip_validation()
1434
1446
  def _validate_mutually_exclusive_argument_groups(*arg_groups, all_falsy_check=False,
1447
+ empty_check=False,
1435
1448
  return_all_falsy_status=False):
1436
1449
  """
1437
1450
  DESCRIPTION:
@@ -1448,12 +1461,21 @@ class _Validators:
1448
1461
  Optional Argument.
1449
1462
  Specifies whether to throw Teradataml Exception when all arguments in all argument
1450
1463
  groups hold Falsy/null values.
1464
+ Default Value: False
1465
+ Types: bool
1466
+
1467
+ empty_check:
1468
+ Optional Argument.
1469
+ Specifies whether to treat empty values like empty string and empty list as None or not.
1470
+ When set to True, empty string and empty list are treated as None.
1471
+ Default Value: False
1451
1472
  Types: bool
1452
1473
 
1453
1474
  return_all_falsy_status:
1454
1475
  Optional Argument.
1455
1476
  Specifies whether to return the boolean flag which states if all arguments in all argument
1456
1477
  groups hold Falsy/null values.
1478
+ Default Value: False
1457
1479
  Types: bool
1458
1480
 
1459
1481
  RETURNS:
@@ -1474,7 +1496,7 @@ class _Validators:
1474
1496
 
1475
1497
  # Example 2: When groups of arguments are mutually exclusive.
1476
1498
  >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": None},
1477
- ... {"arg2": ""},
1499
+ ... {"arg2": None},
1478
1500
  ... {"arg3": "arg3", "arg4": "arg4"})
1479
1501
 
1480
1502
  # Example 3: When all groups of arguments hold falsy values
@@ -1499,14 +1521,43 @@ class _Validators:
1499
1521
  ... {"arg3": None, "arg4": None},
1500
1522
  ... return_all_falsy_status=True)
1501
1523
  True
1524
+
1525
+ # Example 6: When groups of arguments are mutually exclusive
1526
+ # considering empty list and empty string as falsy values.
1527
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": ""},
1528
+ ... {"arg2": []},
1529
+ ... {"arg3": "arg3", "arg4": "arg4"},
1530
+ ... empty_check=True)
1531
+
1532
+ # Example 7: When all groups of arguments hold falsy values
1533
+ # considering empty list and empty string as falsy values
1534
+ # and "all_falsy_check" is set to True.
1535
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": ""},
1536
+ ... {"arg2": []},
1537
+ ... {"arg3": [], "arg4": None},
1538
+ ... {"arg5": "", "arg6": None},
1539
+ ... empty_check=True,
1540
+ ... all_falsy_check=True)
1541
+ TeradataMlException: [Teradata][teradataml](TDML_2061) Provide either '['arg1']' argument(s) or '['arg2']' argument(s) or '['arg3', 'arg4']' argument(s) or '['arg5', 'arg6']' argument(s).
1542
+
1543
+ # Example 8: When groups of arguments are not mutually exclusive
1544
+ # considering empty list and empty string as valid values.
1545
+ >>> _Validators._validate_mutually_exclusive_argument_groups({"arg1": ""},
1546
+ ... {"arg2": []},
1547
+ ... {"arg3": "arg3", "arg4": "arg4"})
1548
+ TeradataMlException: [Teradata][teradataml](TDML_2061) Provide either '['arg1']' argument(s) or '['arg2']' argument(s) or '['arg3', 'arg4']' argument(s).
1549
+
1502
1550
  """
1503
1551
  all_groups_falsy = True
1504
1552
  mutually_exclusive_groups = True
1505
1553
  non_falsy_groups = []
1506
1554
  for arg_grp in arg_groups:
1507
- # TODO: Handling of falsy values can be done in more appropriate way by
1508
- # differentiating None/empty string/empty list.
1509
- is_group_falsy = not any(value for value in arg_grp.values())
1555
+ if empty_check:
1556
+ # Treat empty string and empty list as falsy Value.
1557
+ is_group_falsy = not any(arg_grp.values())
1558
+ else:
1559
+ # Treat only None as falsy Value.
1560
+ is_group_falsy = not any(value is not None for value in arg_grp.values())
1510
1561
  if not is_group_falsy:
1511
1562
  non_falsy_groups.append(arg_grp)
1512
1563
 
@@ -1731,6 +1782,41 @@ class _Validators:
1731
1782
  MessageCodes.DEPENDENT_ARGUMENT)
1732
1783
  return True
1733
1784
 
1785
+ @staticmethod
1786
+ @skip_validation()
1787
+ def _validate_unsupported_argument(arg, arg_name):
1788
+ """
1789
+ DESCRIPTION:
1790
+ Validation to reject unsupported arguments.
1791
+
1792
+ PARAMETERS:
1793
+ arg:
1794
+ Required Argument.
1795
+ Specifies the value passed for the argument that is unsupported.
1796
+ Types: any
1797
+
1798
+ arg_name:
1799
+ Required Argument.
1800
+ Specifies the name of the argument to be printed in error message.
1801
+ Types: str
1802
+
1803
+ RAISES:
1804
+ ValueError, If arg is not None, indicating an unsupported argument was used.
1805
+
1806
+ RETURNS:
1807
+ True, If the argument is not provided (i.e., None), allowing execution to proceed.
1808
+
1809
+ EXAMPLES:
1810
+ _Validators._validate_unsupported_argument(kwargs.get("task_type", None), "task_type")
1811
+ _Validators._validate_unsupported_argument(kwargs.get("is_fraud", None), "is_fraud")
1812
+ _Validators._validate_unsupported_argument(kwargs.get("is_churn", None), "is_churn")
1813
+ """
1814
+ error_code = MessageCodes.UNSUPPORTED_ARGUMENT
1815
+ error_msg = Messages.get_message(error_code, arg_name, arg_name)
1816
+ if arg is not None:
1817
+ raise TeradataMlException(error_msg, error_code)
1818
+ return True
1819
+
1734
1820
  @staticmethod
1735
1821
  @skip_validation()
1736
1822
  def _validate_dependent_method(dependent_mtd, independent_mtd, independent_mtd_calls):
@@ -2626,3 +2712,594 @@ class _Validators:
2626
2712
  required_length),
2627
2713
  MessageCodes.INVALID_LIST_LENGTH)
2628
2714
  return True
2715
+
2716
+ @staticmethod
2717
+ @skip_validation()
2718
+ def _validate_non_empty_list_or_valid_selection(arg_list, arg_name):
2719
+ """
2720
+ DESCRIPTION:
2721
+ Validation to ensure that the given list-type argument is not empty or contains only invalid entries
2722
+ like None, '', 'None', etc.
2723
+
2724
+ PARAMETERS:
2725
+ arg_list:
2726
+ Required Argument.
2727
+ Specifies the list or iterable for validation.
2728
+ Types: list
2729
+
2730
+ arg_name:
2731
+ Required Argument.
2732
+ Specifies the argument name.
2733
+ Types: str
2734
+
2735
+ RAISES:
2736
+ ValueError - If the list is None, empty, or contains only invalid values.
2737
+
2738
+ RETURNS:
2739
+ True - If validation passes (non-empty and has valid entries).
2740
+
2741
+ EXAMPLES:
2742
+ >>> _Validators._validate_non_empty_list_or_valid_selection(self.model_list, "List of models")
2743
+ """
2744
+
2745
+ error_code = MessageCodes.LIST_SELECT_NONE_OR_EMPTY
2746
+ if not arg_list or all(x in [None, "None", ""] for x in arg_list):
2747
+ raise TeradataMlException(Messages.get_message(error_code).format(arg_name), error_code)
2748
+ return True
2749
+
2750
+ @staticmethod
2751
+ def _validate_temporal_table_type(df_type, api_type='method', api_name='as_of'):
2752
+ """
2753
+ DESCRIPTION:
2754
+ Function to validate temporal table type.
2755
+
2756
+ PARAMETERS:
2757
+ df_type:
2758
+ Required Argument.
2759
+ Specifies the type of temporal table.
2760
+ Types: str
2761
+
2762
+ api_type:
2763
+ Required Argument.
2764
+ Specifies the type of API.
2765
+ Types: str
2766
+
2767
+ api_name:
2768
+ Required Argument.
2769
+ Specifies the name of API.
2770
+ Types: str
2771
+
2772
+ RETURNS:
2773
+ None.
2774
+
2775
+ RAISES:
2776
+ TeradataMLException
2777
+
2778
+ EXAMPLES:
2779
+ >>> _Validators._validate_temporal_table_type('method', 'as_of')
2780
+ """
2781
+ if df_type not in (DataFrameTypes.VALID_TIME_VIEW.name,
2782
+ DataFrameTypes.TRANSACTION_TIME_VIEW.name,
2783
+ DataFrameTypes.BI_TEMPORAL_VIEW.name,
2784
+ DataFrameTypes.BI_TEMPORAL.name,
2785
+ DataFrameTypes.TRANSACTION_TIME.name,
2786
+ DataFrameTypes.VALID_TIME.name,
2787
+ DataFrameTypes.VALID_TIME_VOLATILE_TABLE.name,
2788
+ DataFrameTypes.TRANSACTION_TIME_VOLATILE_TABLE.name,
2789
+ DataFrameTypes.BI_TEMPORAL_VOLATILE_TABLE.name
2790
+ ):
2791
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_USAGE,
2792
+ api_type,
2793
+ "'{}'".format(api_name),
2794
+ "when underlying table or view is temporal type"),
2795
+ MessageCodes.INVALID_USAGE)
2796
+
2797
+ @staticmethod
2798
+ def _validate_as_of_arguments(df_type, argument_name='valid_time'):
2799
+ """
2800
+ DESCRIPTION:
2801
+ Function to validate arguments passed for method as_of.
2802
+ One can not pass argument 'valid_time' for a transaction time table
2803
+ One can not pass argument 'transaction_time' for a valid time table.
2804
+ Both the validations are done in this validator.
2805
+
2806
+ PARAMETERS:
2807
+ df_type:
2808
+ Required Argument.
2809
+ Specifies the type of temporal table.
2810
+ Types: str
2811
+
2812
+ argument_name:
2813
+ Optional Argument.
2814
+ Specifies the name of the argument.
2815
+ Default Value: 'valid_time'
2816
+ Types: str
2817
+
2818
+ RETURNS:
2819
+ None.
2820
+
2821
+ RAISES:
2822
+ TeradataMLException
2823
+
2824
+ EXAMPLES:
2825
+ >>> _Validators._validate_temporal_table_type('method', 'as_of')
2826
+ """
2827
+ valid_types = (
2828
+ DataFrameTypes.TRANSACTION_TIME_VIEW.name,
2829
+ DataFrameTypes.TRANSACTION_TIME.name,
2830
+ DataFrameTypes.TRANSACTION_TIME_VOLATILE_TABLE.name
2831
+ )
2832
+ table_type = 'transaction time dimension'
2833
+
2834
+ if argument_name == 'valid_time':
2835
+ valid_types = (DataFrameTypes.VALID_TIME_VIEW.name,
2836
+ DataFrameTypes.VALID_TIME.name,
2837
+ DataFrameTypes.VALID_TIME_VOLATILE_TABLE.name
2838
+ )
2839
+ table_type = 'valid time dimension'
2840
+
2841
+ bi_temporal_types = (
2842
+ DataFrameTypes.BI_TEMPORAL_VIEW.name,
2843
+ DataFrameTypes.BI_TEMPORAL.name,
2844
+ DataFrameTypes.BI_TEMPORAL_VOLATILE_TABLE.name
2845
+ )
2846
+
2847
+ # Raise error only if it is not a bitemporal table.
2848
+ if (df_type not in bi_temporal_types) and (df_type not in valid_types):
2849
+ raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_USAGE,
2850
+ 'argument',
2851
+ "'{}'".format(argument_name),
2852
+ "when underlying table or view is in {}".format(table_type)),
2853
+ MessageCodes.INVALID_USAGE)
2854
+
2855
+ @staticmethod
2856
+ def _validate_period_column_type(column_type):
2857
+ """
2858
+ DESCRIPTION:
2859
+ Function to validate the type of a period column.
2860
+
2861
+ PARAMETERS:
2862
+ column_type:
2863
+ Required Argument.
2864
+ Specifies the type of the column to be validated.
2865
+ Types: Any
2866
+
2867
+ RETURNS:
2868
+ None
2869
+
2870
+ RAISES:
2871
+ TeradataMlException
2872
+
2873
+ EXAMPLES:
2874
+ _Validators._validate_period_column_type(PERIOD_DATE)
2875
+ """
2876
+ if not isinstance(column_type, (PERIOD_DATE, PERIOD_TIMESTAMP)):
2877
+ raise TeradataMlException(
2878
+ Messages.get_message(
2879
+ MessageCodes.INVALID_COLUMN_TYPE
2880
+ ).format(
2881
+ "period column",
2882
+ type(column_type).__name__,
2883
+ "PERIOD_DATE or PERIOD_TIMESTAMP"
2884
+ ),
2885
+ MessageCodes.INVALID_COLUMN_TYPE
2886
+ )
2887
+ @staticmethod
2888
+ @skip_validation()
2889
+ def _validate_features_not_in_efs_dataset(df,
2890
+ feature_names,
2891
+ action):
2892
+ """
2893
+ DESCRIPTION:
2894
+ Function to validate whether the feature names provided
2895
+ are not present in the EFS dataset.
2896
+
2897
+ PARAMETERS:
2898
+ df:
2899
+ Required Argument.
2900
+ Specifies the EFS dataset dataframe.
2901
+ Types: teradataml.dataframe.dataframe.DataFrame
2902
+
2903
+ feature_names:
2904
+ Required Argument.
2905
+ Specifies the feature names to be validated.
2906
+ Types: str or list of str
2907
+
2908
+ action:
2909
+ Required Argument.
2910
+ Specifies the action to be performed.
2911
+ Permitted Values: 'archived', 'deleted'
2912
+ Types: str
2913
+
2914
+ RETURNS:
2915
+ True, if the feature names are not present in the EFS dataset.
2916
+
2917
+ RAISES:
2918
+ TeradataMlException
2919
+
2920
+ EXAMPLES:
2921
+ >>> _Validators._validate_features_not_in_efs_dataset(df, ["feature1", "feature2"], "delete")
2922
+ """
2923
+ if isinstance(feature_names, str):
2924
+ feature_names = [feature_names]
2925
+
2926
+ invalid_df = df[(df['feature_name'].isin(feature_names))]
2927
+
2928
+ if invalid_df.shape[0] > 0:
2929
+ names = set()
2930
+ datasets = set()
2931
+ for feature in invalid_df.itertuples():
2932
+ names.add(feature.feature_name)
2933
+ datasets.add(feature.dataset_id)
2934
+ datasets_str = ", ".join(f"'{dataset}'" for dataset in datasets)
2935
+ name_str = ", ".join(f"'{name}'" for name in names)
2936
+
2937
+ error_code = MessageCodes.EFS_FEATURE_IN_DATASET
2938
+ error_msg = Messages.get_message(error_code,
2939
+ name_str,
2940
+ datasets_str,
2941
+ action)
2942
+ raise TeradataMlException(error_msg, error_code)
2943
+
2944
+ return True
2945
+
2946
+ @staticmethod
2947
+ def _validate_dataset_ids_not_in_efs(df, ids, data_domain, repo):
2948
+ """
2949
+ DESCRIPTION:
2950
+ Function to validate whether the dataset ids provided
2951
+ are not present in the EFS.
2952
+
2953
+ PARAMETERS:
2954
+ df:
2955
+ Required Argument.
2956
+ Specifies the EFS dataset dataframe.
2957
+ Types: teradataml.dataframe.dataframe.DataFrame
2958
+
2959
+ ids:
2960
+ Required Argument.
2961
+ Specifies the dataset ids to be validated.
2962
+ Types: str or list of str
2963
+
2964
+ data_domain:
2965
+ Required Argument.
2966
+ Specifies the data domain for the feature process.
2967
+ Types: str
2968
+
2969
+ repo:
2970
+ Required Argument.
2971
+ Specifies the repository to be used for validation.
2972
+ Types: str
2973
+
2974
+ RETURNS:
2975
+ True, if the dataset ids are not present in the EFS.
2976
+
2977
+ RAISES:
2978
+ TeradataMlException
2979
+
2980
+ EXAMPLES:
2981
+ >>> _Validators._validate_features_not_in_efs_dataset(df, ["12-dek-3e3-dek"], "d1")
2982
+ """
2983
+ from teradataml.common.utils import UtilFuncs
2984
+ id_list_flag = False if isinstance(ids, str) else True
2985
+ list_ids = UtilFuncs._as_list(ids)
2986
+
2987
+ # Check if the dataset ids are present in the domain.
2988
+ df = df[(df['id'].isin(list_ids)) &
2989
+ (df['data_domain'] == data_domain)]
2990
+ matched_ids = [i.id for i in df.select("id").itertuples()]
2991
+ # Get the list of dataset ids that are not present in the domain.
2992
+ missing_ids = [i for i in list_ids if i not in matched_ids]
2993
+
2994
+ # If there are ids that are not present in the domain,
2995
+ # raise an exception with appropriate error message.
2996
+ if len(missing_ids) > 0:
2997
+ if id_list_flag:
2998
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
2999
+ error_msg = Messages.get_message(msg_code, "Dataset", "id(s): {}".format(missing_ids),
3000
+ data_domain) + " Use DatasetCatalog.list_datasets() to list valid dataset ids."
3001
+ else:
3002
+ # Check if the dataset id is present in any other domain.
3003
+ from teradataml.store.feature_store.utils import _FSUtils
3004
+ res = _FSUtils._get_data_domains(repo, ids, 'dataset')
3005
+ if res:
3006
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
3007
+ error_msg = Messages.get_message(msg_code, "Dataset", "id '{}'".format(ids),
3008
+ data_domain, res)
3009
+ else:
3010
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
3011
+ error_msg = Messages.get_message(msg_code, "Dataset", "id '{}'".format(ids),
3012
+ data_domain)
3013
+ raise TeradataMlException(error_msg, msg_code)
3014
+
3015
+ return True
3016
+
3017
+
3018
+ @staticmethod
3019
+ @skip_validation()
3020
+ def _validate_duplicate_objects(objects, type_="features", arg_name='features'):
3021
+ """
3022
+ DESCRIPTION:
3023
+ Function to validate that there are no duplicate objects in the provided list.
3024
+
3025
+ PARAMETERS:
3026
+ objects:
3027
+ Required Argument.
3028
+ Specifies the objects to validate for duplicates.
3029
+ Types: list or tuple
3030
+
3031
+ type_:
3032
+ Optional Argument.
3033
+ Specifies the type of objects being validated.
3034
+ Default Value: "features"
3035
+ Types: str
3036
+
3037
+ arg_name:
3038
+ Optional Argument.
3039
+ Specifies the name of the argument being validated.
3040
+ Default Value: "features"
3041
+ Types: str
3042
+
3043
+ RAISES:
3044
+ TeradataMlException
3045
+
3046
+ Returns:
3047
+ bool
3048
+
3049
+ EXAMPLES:
3050
+ >>> load_examples_data('dataframe', 'sales')
3051
+ >>> df = DataFrame('sales')
3052
+
3053
+ # Example 1: Validate duplicate features in the list.
3054
+ >>> feature1 = Feature("Jan", df.Jan)
3055
+ >>> _Validators._validate_duplicate_objects([feature1, 'Jan', 'Feb'])
3056
+
3057
+ # Example 2: Validate duplicate datetime.datetime objects in tuple.
3058
+ >>> t = datetime.datetime(2025, 1, 1, 0, 0, 1)
3059
+ >>> td = datetime.date(2025, 1, 1)
3060
+ >>> _Validators._validate_duplicate_objects((td, td.strftime('%Y-%m-%d %H:%M:%S'), t))
3061
+ """
3062
+ from teradataml.common.utils import UtilFuncs
3063
+ from teradataml.store.feature_store.models import Feature
3064
+ seen = set()
3065
+ duplicates = set()
3066
+
3067
+ if isinstance(objects, (list, tuple)):
3068
+ for obj in objects:
3069
+ if isinstance(obj, Feature):
3070
+ name = obj.column_name
3071
+ elif isinstance(obj, (datetime.datetime, datetime.date)):
3072
+ name = obj.strftime('%Y-%m-%d %H:%M:%S')
3073
+ else:
3074
+ name = obj
3075
+
3076
+ if name in seen:
3077
+ duplicates.add(name)
3078
+ else:
3079
+ seen.add(name)
3080
+
3081
+ if len(duplicates) > 0:
3082
+ msg = "{} in {}".format(
3083
+ ", ".join(["'{}'".format(duplicate) for duplicate in sorted(duplicates)]),
3084
+ "'{}' argument".format(arg_name)
3085
+ )
3086
+ raise TeradataMlException(
3087
+ Messages.get_message(
3088
+ MessageCodes.DF_DUPLICATE_VALUES,
3089
+ type_,
3090
+ msg
3091
+ ),
3092
+ MessageCodes.DF_DUPLICATE_VALUES)
3093
+
3094
+ return True
3095
+
3096
+ @staticmethod
3097
+ @skip_validation()
3098
+ def _validate_duplicate_values(df, columns, arg_name, columns_arg='entity column(s)'):
3099
+ """
3100
+ DESCRIPTION:
3101
+ Function to validate that there are no duplicate records in the DataFrame
3102
+ based on the specified columns.
3103
+
3104
+ PARAMETERS:
3105
+ df:
3106
+ Required Argument.
3107
+ Specifies the DataFrame to validate for duplicates.
3108
+ Types: teradataml DataFrame
3109
+
3110
+ columns:
3111
+ Required Argument.
3112
+ Specifies the columns to check for duplicates.
3113
+ Types: str or list of str
3114
+
3115
+ arg_name:
3116
+ Required Argument.
3117
+ Specifies the name of the argument being validated.
3118
+ Types: str
3119
+
3120
+ columns_arg:
3121
+ Optional Argument.
3122
+ Specifies the name of the columns argument.
3123
+ Default Value: 'entity column(s)'
3124
+ Types: str
3125
+
3126
+ RAISES:
3127
+ TeradataMlException
3128
+
3129
+ Returns:
3130
+ bool
3131
+
3132
+ EXAMPLES:
3133
+ >>> _Validators._validate_duplicate_records(df, ['col1', 'col2'], 'columns')
3134
+ """
3135
+ columns = [columns] if isinstance(columns, str) else columns
3136
+ df_ = df.groupby(columns).assign(total_rows_=func.count('*'))
3137
+ duplicate_recs = df_[df_.total_rows_ > 1].shape[0]
3138
+
3139
+ if duplicate_recs > 0:
3140
+ msg = "in {} {} provided in argument {}".format(
3141
+ columns_arg,
3142
+ ", ".join(["'{}'".format(col) for col in columns]),
3143
+ "'{}'".format(arg_name)
3144
+ )
3145
+ raise TeradataMlException(
3146
+ Messages.get_message(
3147
+ MessageCodes.DF_DUPLICATE_VALUES,
3148
+ "values in {}".format(columns_arg),
3149
+ msg
3150
+ ),
3151
+ MessageCodes.DF_DUPLICATE_VALUES)
3152
+
3153
+ return True
3154
+
3155
+ @staticmethod
3156
+ @skip_validation()
3157
+ def _validate_null_values(df,
3158
+ columns,
3159
+ arg_name,
3160
+ columns_arg='entity column(s)',
3161
+ operation='ingesting the features'):
3162
+ """
3163
+ DESCRIPTION:
3164
+ Function to validate that there are no null values in the specified columns
3165
+ of the DataFrame.
3166
+
3167
+ PARAMETERS:
3168
+ df:
3169
+ Required Argument.
3170
+ Specifies the DataFrame to validate for null values.
3171
+ Types: teradataml DataFrame
3172
+
3173
+ columns:
3174
+ Required Argument.
3175
+ Specifies the columns to check for null values.
3176
+ Types: str or list of str
3177
+
3178
+ arg_name:
3179
+ Required Argument.
3180
+ Specifies the name of the argument being validated.
3181
+ Types: str
3182
+
3183
+ columns_arg:
3184
+ Optional Argument.
3185
+ Specifies the name of the columns argument.
3186
+ Default Value: 'entity column(s)'
3187
+ Types: str
3188
+
3189
+ operation:
3190
+ Optional Argument.
3191
+ Specifies the operation being performed.
3192
+ Default Value: 'ingesting the features'
3193
+ Types: str
3194
+
3195
+ RAISES:
3196
+ TeradataMlException
3197
+
3198
+ Returns:
3199
+ bool
3200
+
3201
+ EXAMPLES:
3202
+ >>> _Validators._validate_null_values(df, ['col1', 'col2'], 'columns')
3203
+ """
3204
+ columns = [columns] if isinstance(columns, str) else columns
3205
+ col_expr = (df[columns[0]] == None)
3206
+ for column in columns[1:]:
3207
+ col_expr = col_expr | (df[column] == None)
3208
+
3209
+ null_count = df[col_expr].shape[0]
3210
+
3211
+ if null_count > 0:
3212
+ msg = "in {} {} provided in argument {}".format(
3213
+ columns_arg,
3214
+ ", ".join(["'{}'".format(col) for col in columns]),
3215
+ "'{}'".format(arg_name)
3216
+ )
3217
+ raise TeradataMlException(
3218
+ Messages.get_message(
3219
+ MessageCodes.DF_NULL_VALUES,
3220
+ columns_arg,
3221
+ operation,
3222
+ msg
3223
+ ),
3224
+ MessageCodes.DF_NULL_VALUES)
3225
+
3226
+ return True
3227
+
3228
+ @staticmethod
3229
+ @skip_validation()
3230
+ def _validate_archived_features(features_to_validate, archived_features, msg=""):
3231
+ """
3232
+ DESCRIPTION:
3233
+ Function to validate that the features are already archived or not.
3234
+ If archived, it raises an exception.
3235
+
3236
+ PARAMETERS:
3237
+ features_to_validate:
3238
+ Required Argument.
3239
+ Specifies the features to be validated for archiving.
3240
+ Types: list of str
3241
+
3242
+ archived_features:
3243
+ Required Argument.
3244
+ Specifies the set of already archived features.
3245
+ Types: set of str
3246
+
3247
+ msg:
3248
+ Optional Argument.
3249
+ Specifies the additional message to be displayed in the exception.
3250
+ Default Value: ""
3251
+ Types: str
3252
+
3253
+ RAISES:
3254
+ TeradataMlException
3255
+
3256
+ Returns:
3257
+ bool
3258
+
3259
+ EXAMPLES:
3260
+ >>> _Validators._validate_archived_features(['feature1', 'feature2'], {'feature1'})
3261
+ """
3262
+ features_to_validate = [features_to_validate] if isinstance(features_to_validate, str) \
3263
+ else features_to_validate
3264
+ archived_features = [f for f in features_to_validate if f in archived_features]
3265
+
3266
+ if archived_features:
3267
+ raise TeradataMlException(
3268
+ Messages.get_message(MessageCodes.FEATURES_ARCHIVED,
3269
+ ", ".join("'{}'".format(f) for f in archived_features),
3270
+ msg
3271
+ ),
3272
+ MessageCodes.FEATURES_ARCHIVED)
3273
+
3274
+ return True
3275
+
3276
+ @staticmethod
3277
+ @skip_validation()
3278
+ def _validate_any_argument_passed(args_dict):
3279
+ """
3280
+ DESCRIPTION:
3281
+ Check if any value in the argument dictionary is not None.
3282
+ If all values are None, raise an exception.
3283
+
3284
+ PARAMETERS:
3285
+ args_dict:
3286
+ Required Argument.
3287
+ Specifies the argument to value dictionary to check.
3288
+ Types: dict
3289
+
3290
+ RAISES:
3291
+ TeradataMlException
3292
+
3293
+ Returns:
3294
+ bool
3295
+
3296
+ EXAMPLES:
3297
+ >>> _Validators._validate_any_argument_passed({"arg1": None, "arg2": "abc"})
3298
+ """
3299
+ if all(value is None for value in args_dict.values()):
3300
+ msg_code = MessageCodes.EITHER_ANY_ARGUMENT
3301
+ argument_description = " or ".join(["'{}'".format(key) for key in args_dict.keys()])
3302
+ error_msg = Messages.get_message(msg_code, argument_description)
3303
+ raise TeradataMlException(error_msg, msg_code)
3304
+
3305
+ return True