teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +193 -1
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +25 -18
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  8. teradataml/analytics/sqle/__init__.py +20 -2
  9. teradataml/analytics/utils.py +15 -1
  10. teradataml/analytics/valib.py +18 -4
  11. teradataml/automl/__init__.py +341 -112
  12. teradataml/automl/autodataprep/__init__.py +471 -0
  13. teradataml/automl/data_preparation.py +84 -42
  14. teradataml/automl/data_transformation.py +69 -33
  15. teradataml/automl/feature_engineering.py +76 -9
  16. teradataml/automl/feature_exploration.py +639 -25
  17. teradataml/automl/model_training.py +35 -14
  18. teradataml/clients/auth_client.py +2 -2
  19. teradataml/common/__init__.py +1 -2
  20. teradataml/common/constants.py +122 -63
  21. teradataml/common/messagecodes.py +14 -3
  22. teradataml/common/messages.py +8 -4
  23. teradataml/common/sqlbundle.py +40 -10
  24. teradataml/common/utils.py +366 -74
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +348 -86
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/apriori_example.json +22 -0
  29. teradataml/data/byom_example.json +11 -0
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  37. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  38. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  40. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  41. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  42. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  43. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  45. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  49. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  51. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  52. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  53. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  54. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  55. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  57. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  58. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  59. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  60. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  61. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  62. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  63. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  64. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  65. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  67. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  68. teradataml/data/hnsw_alter_data.csv +5 -0
  69. teradataml/data/hnsw_data.csv +10 -0
  70. teradataml/data/jsons/byom/h2opredict.json +1 -1
  71. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  72. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  73. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  74. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  75. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  76. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  77. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  78. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  79. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  80. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  81. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  82. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  83. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  84. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  85. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  86. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  87. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  88. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  89. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  90. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  91. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  92. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  93. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  94. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
  95. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
  96. teradataml/data/ner_dict.csv +8 -0
  97. teradataml/data/ner_input_eng.csv +7 -0
  98. teradataml/data/ner_rule.csv +5 -0
  99. teradataml/data/pos_input.csv +40 -0
  100. teradataml/data/tdnerextractor_example.json +14 -0
  101. teradataml/data/teradataml_example.json +21 -0
  102. teradataml/data/textmorph_example.json +5 -0
  103. teradataml/data/to_num_data.csv +4 -0
  104. teradataml/data/tochar_data.csv +5 -0
  105. teradataml/data/trans_dense.csv +16 -0
  106. teradataml/data/trans_sparse.csv +55 -0
  107. teradataml/data/vectordistance_example.json +1 -1
  108. teradataml/dataframe/copy_to.py +45 -29
  109. teradataml/dataframe/data_transfer.py +72 -46
  110. teradataml/dataframe/dataframe.py +642 -166
  111. teradataml/dataframe/dataframe_utils.py +167 -22
  112. teradataml/dataframe/functions.py +135 -20
  113. teradataml/dataframe/setop.py +11 -6
  114. teradataml/dataframe/sql.py +330 -78
  115. teradataml/dbutils/dbutils.py +556 -140
  116. teradataml/dbutils/filemgr.py +14 -10
  117. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  118. teradataml/lib/aed_0_1.dll +0 -0
  119. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
  120. teradataml/opensource/_class.py +141 -17
  121. teradataml/opensource/{constants.py → _constants.py} +7 -3
  122. teradataml/opensource/_lightgbm.py +52 -53
  123. teradataml/opensource/_sklearn.py +1008 -0
  124. teradataml/opensource/_wrapper_utils.py +5 -5
  125. teradataml/options/__init__.py +47 -15
  126. teradataml/options/configure.py +103 -26
  127. teradataml/options/display.py +13 -2
  128. teradataml/plot/axis.py +47 -8
  129. teradataml/plot/figure.py +33 -0
  130. teradataml/plot/plot.py +63 -13
  131. teradataml/scriptmgmt/UserEnv.py +307 -40
  132. teradataml/scriptmgmt/lls_utils.py +428 -145
  133. teradataml/store/__init__.py +2 -3
  134. teradataml/store/feature_store/feature_store.py +102 -7
  135. teradataml/table_operators/Apply.py +48 -19
  136. teradataml/table_operators/Script.py +23 -2
  137. teradataml/table_operators/TableOperator.py +3 -1
  138. teradataml/table_operators/table_operator_util.py +58 -9
  139. teradataml/utils/dtypes.py +49 -1
  140. teradataml/utils/internal_buffer.py +38 -0
  141. teradataml/utils/validators.py +377 -62
  142. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
  143. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
  144. teradataml/data/SQL_Fundamentals.pdf +0 -0
  145. teradataml/libaed_0_1.dylib +0 -0
  146. teradataml/libaed_0_1.so +0 -0
  147. teradataml/opensource/sklearn/__init__.py +0 -0
  148. teradataml/store/vector_store/__init__.py +0 -1586
  149. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  150. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  151. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -30,7 +30,7 @@ from teradataml import ColumnExpression
30
30
  from teradataml.dataframe.dataframe import DataFrame
31
31
  from teradataml.utils.utils import execute_sql
32
32
  from teradataml.utils.validators import _Validators
33
- from teradataml import ROC, BLOB
33
+ from teradataml import ROC, BLOB, VARCHAR
34
34
  from teradataml.utils.dtypes import _Dtypes
35
35
  from teradataml.common.utils import UtilFuncs
36
36
  from teradataml import TeradataMlException
@@ -94,6 +94,9 @@ class AutoML:
94
94
  the processes by passing the JSON file path in case of custom run. It also
95
95
  supports early stopping of model training based on stopping metrics,
96
96
  maximum running time and maximum models to be trained.
97
+ Note:
98
+ * configure.temp_object_type="VT" follows sequential execution.
99
+
97
100
 
98
101
  PARAMETERS:
99
102
  task_type:
@@ -185,8 +188,17 @@ class AutoML:
185
188
  results are persisted in a table; otherwise,
186
189
  results are garbage collected at the end of the
187
190
  session.
191
+ Note:
192
+ * User is responsible for cleanup of the persisted tables. List of persisted tables
193
+ in current session can be viewed using get_persisted_tables() method.
188
194
  Default Value: False
189
195
  Types: bool
196
+
197
+ seed:
198
+ Optional Argument.
199
+ Specifies the random seed for reproducibility.
200
+ Default Value: 42
201
+ Types: int
190
202
 
191
203
  RETURNS:
192
204
  Instance of AutoML.
@@ -417,9 +429,11 @@ class AutoML:
417
429
 
418
430
  volatile = kwargs.get('volatile', False)
419
431
  persist = kwargs.get('persist', False)
432
+ seed = kwargs.get('seed', 42)
420
433
 
421
434
  arg_info_matrix.append(["volatile", volatile, True, (bool)])
422
435
  arg_info_matrix.append(["persist", persist, True, (bool)])
436
+ arg_info_matrix.append(["seed", seed, True, (int)])
423
437
 
424
438
  # Validate argument types
425
439
  _Validators._validate_function_arguments(arg_info_matrix)
@@ -465,8 +479,13 @@ class AutoML:
465
479
  self._is_fit_called = False
466
480
  self._is_load_model_called = False
467
481
  self.kwargs = kwargs
468
- self.table_name_mapping={}
469
-
482
+ self.table_name_mapping = {}
483
+ # Stores the table name of all intermediate datas
484
+ self._intermediate_table_names={}
485
+ self._auto_dataprep = False
486
+ self._phases = None
487
+ self._progressbar_prefix = "AutoML Running:"
488
+
470
489
  @collect_queryband(queryband="AutoML_fit")
471
490
  def fit(self,
472
491
  data,
@@ -517,7 +536,7 @@ class AutoML:
517
536
 
518
537
  # Validate argument types
519
538
  _Validators._validate_function_arguments(arg_info_fit_matrix)
520
-
539
+
521
540
  # Initializing class variables
522
541
  self.data = data
523
542
  self.target_column = target_column
@@ -591,15 +610,25 @@ class AutoML:
591
610
  clf = task_cls(self.data, self.target_column, self.custom_data)
592
611
 
593
612
  self.model_info, self.leader_board, self.target_count, self.target_label, \
594
- self.data_transformation_params, self.table_name_mapping = getattr(clf, cls_method)(
595
- model_list = self.model_list,
596
- auto = self.auto,
597
- verbose = self.verbose,
598
- max_runtime_secs = self.max_runtime_secs,
599
- stopping_metric = self.stopping_metric,
600
- stopping_tolerance = self.stopping_tolerance,
601
- max_models = self.max_models,
602
- **self.kwargs)
613
+ self.data_transformation_params, self._intermediate_table_names = getattr(clf, cls_method)(
614
+ model_list = self.model_list,
615
+ auto = self.auto,
616
+ verbose = self.verbose,
617
+ max_runtime_secs = self.max_runtime_secs,
618
+ stopping_metric = self.stopping_metric,
619
+ stopping_tolerance = self.stopping_tolerance,
620
+ max_models = self.max_models,
621
+ auto_dataprep = self._auto_dataprep,
622
+ automl_phases = self._phases,
623
+ progress_prefix = self._progressbar_prefix,
624
+ **self.kwargs)
625
+
626
+
627
+ # table_name_mapping stores the table name of all intermediate datas (lasso, rfe, pca)
628
+ # used for training models
629
+ keys_to_extract = ['lasso_train', 'rfe_train', 'pca_train']
630
+ self.table_name_mapping = {key: self._intermediate_table_names[key] for key in keys_to_extract
631
+ if key in self._intermediate_table_names}
603
632
 
604
633
  # Model Evaluation Phase
605
634
  self.m_evaluator = _ModelEvaluator(self.model_info,
@@ -669,13 +698,9 @@ class AutoML:
669
698
  >>> prediction = automl_obj.predict(admissions_test, rank=3, use_loaded_models=True)
670
699
  >>> prediction
671
700
  """
672
- # Checking if fit or load model is called before predict, If not raise error
673
- if not self._is_fit_called and not self._is_load_model_called:
674
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
675
- "'predict' method", \
676
- "'fit' or 'load' method must be called before" \
677
- " running predict.")
678
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
701
+ # Raise error if fit is not called before predict
702
+ _Validators._validate_dependent_method("predict", ["fit", "load"],
703
+ [self._is_fit_called, self._is_load_model_called])
679
704
 
680
705
  # Appending predict arguments to list for validation.
681
706
  arg_info_pred_matrix = []
@@ -758,11 +783,12 @@ class AutoML:
758
783
  if self.target_column_ind:
759
784
  prediction_column = 'prediction' if 'prediction' in pred.result.columns else 'Prediction'
760
785
  probability_column = 'prob_1'
786
+ pred_target_count = pred.result.drop_duplicate(self.target_column).size
761
787
  # Displaying confusion matrix and ROC-AUC for classification problem
762
788
  if self.is_classification_type():
763
789
  print_data = lambda data: print(data) if _is_terminal() else display(data)
764
790
  # Displaying ROC-AUC for binary classification
765
- if self.target_count == 2:
791
+ if self.target_count == 2 and pred_target_count == 2:
766
792
  fit_params = {
767
793
  "probability_column" : probability_column,
768
794
  "observation_column" : self.target_column,
@@ -850,13 +876,10 @@ class AutoML:
850
876
  >>> evaluation = automl_obj.evaluate(admissions_test, rank=3, use_loaded_models=True)
851
877
  >>> evaluation
852
878
  """
853
- if not self._is_fit_called and not self._is_load_model_called:
854
- # raise ValueError("fit() method must be called before evaluating.")
855
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
856
- "'evaluate' method", \
857
- "'fit' or 'load' method must be called before" \
858
- " running evaluate.")
859
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
879
+ # Raising exception if fit or load model is not called before evaluate
880
+ _Validators._validate_dependent_method("evaluate", ["fit", "load"],
881
+ [self._is_fit_called, self._is_load_model_called])
882
+
860
883
  # Appending evaluate arguments to list for validation.
861
884
  arg_info_pred_matrix = []
862
885
  arg_info_pred_matrix.append(["data", data, False, (DataFrame), True])
@@ -886,8 +909,8 @@ class AutoML:
886
909
  # as it is required for evaluation.
887
910
  if self.target_column not in data.columns:
888
911
  raise TeradataMlException(
889
- Messages.get_message(MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE).format(self.target_column),
890
- MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE)
912
+ Messages.get_message(MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE).format(self.target_column),
913
+ MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE)
891
914
 
892
915
  # Checking if data is already transformed before or not
893
916
  data_node_id = data._nodeid
@@ -1005,13 +1028,9 @@ class AutoML:
1005
1028
  # Generate leaderboard using leaderboard() method on "automl_obj".
1006
1029
  >>> automl_obj.leaderboard()
1007
1030
  """
1008
- if not self._is_fit_called:
1009
- # raise ValueError("fit() method must be called before generating leaderboard.")
1010
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1011
- "'leaderboard' method", \
1012
- "'fit' method must be called before" \
1013
- " generating leaderboard.")
1014
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1031
+ # Raise error if fit is not called before leaderboard
1032
+ _Validators._validate_dependent_method("leaderboard", "fit", self._is_fit_called)
1033
+
1015
1034
  return self.leader_board
1016
1035
 
1017
1036
  @collect_queryband(queryband="AutoML_leader")
@@ -1034,13 +1053,9 @@ class AutoML:
1034
1053
  # Display best performing model using leader() method on "automl_obj".
1035
1054
  >>> automl_obj.leader()
1036
1055
  """
1037
- if not self._is_fit_called:
1038
- # raise ValueError("fit() method must be called before generating leader.")
1039
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1040
- "'leader' method", \
1041
- "'fit' method must be called before" \
1042
- " generating leader.")
1043
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1056
+ # Raise error if fit is not called before leader
1057
+ _Validators._validate_dependent_method("leader", "fit", self._is_fit_called)
1058
+
1044
1059
  record = self.leader_board
1045
1060
  if not _is_terminal():
1046
1061
  display(record[record['RANK'] == 1])
@@ -1113,13 +1128,9 @@ class AutoML:
1113
1128
  >>> automl_obj.model_hyperparameters(rank=1)
1114
1129
  """
1115
1130
 
1116
- if not self._is_fit_called and not self._is_load_model_called:
1117
- # raise ValueError("fit() or load() method must be called before getting hyperparameters.")
1118
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1119
- "'model_hyperparameters' method",
1120
- "No models available to get hyperparameters. " \
1121
- "Run 'fit()' or 'load()' methods to get models.")
1122
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1131
+ # Raise error if fit or load model is not called before model_hyperparameters
1132
+ _Validators._validate_dependent_method("model_hyperparameters", ["fit", "load"],
1133
+ [self._is_fit_called, self._is_load_model_called])
1123
1134
 
1124
1135
  arg_info_matrix = []
1125
1136
  arg_info_matrix.append(["rank", rank, True, (int), True])
@@ -1234,6 +1245,8 @@ class AutoML:
1234
1245
  pca.n_components_ = load_pca_info['n_components']
1235
1246
  pca.noise_variance_ = load_pca_info['noise_variance']
1236
1247
  pca.singular_values_ = np.array(load_pca_info['singular_values'])
1248
+ pca.feature_names_in_ = data_params['pca_fit_columns']
1249
+ pca.n_features_in_ = len(data_params['pca_fit_columns'])
1237
1250
 
1238
1251
  data_params['pca_fit_instance'] = pca
1239
1252
 
@@ -1256,28 +1269,18 @@ class AutoML:
1256
1269
  start_rank, end_rank = ranks.start, ranks.stop
1257
1270
 
1258
1271
  # Check if both parts are non-negative integers
1259
- if not (start_rank > 0 and end_rank > 0):
1260
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1261
- "'deploy' method", \
1262
- "Provided start and end rank in 'ranks' "\
1263
- "must be positive non-zero integers.")
1264
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1272
+ _Validators._validate_positive_int(start_rank, "ranks(start)")
1273
+ _Validators._validate_positive_int(end_rank, "ranks(end)")
1265
1274
 
1266
1275
  # Check if start_rank is less than or equal to end_rank
1267
1276
  if start_rank > end_rank:
1268
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1269
- "'deploy' method", \
1270
- "Provided start rank in 'ranks' must be less than"\
1271
- " or equal to end rank in 'ranks'.")
1272
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1277
+ err = "Provided start rank in 'ranks' must be less than or equal to end rank in 'ranks'."
1278
+ self._raise_error("deploy", err)
1273
1279
 
1274
1280
  # check end rank is less than or equal to total models
1275
1281
  if end_rank > self.leader_board.RANK.max():
1276
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1277
- "'deploy' method", \
1278
- "Provided end rank in 'ranks' must be less than"\
1279
- " or equal to total models available.")
1280
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1282
+ err = "Provided end rank in 'ranks' must be less than or equal to total models available."
1283
+ self._raise_error("deploy", err)
1281
1284
 
1282
1285
  return start_rank, end_rank
1283
1286
 
@@ -1342,12 +1345,7 @@ class AutoML:
1342
1345
  >>> obj.deploy("model_table", ranks=range(2,6))
1343
1346
  """
1344
1347
  # raise Error if fit is not called
1345
- if not self._is_fit_called:
1346
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1347
- "'deploy' method", \
1348
- "'fit' method must be called before" \
1349
- " 'deploy'.")
1350
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1348
+ _Validators._validate_dependent_method("deploy", "fit", self._is_fit_called)
1351
1349
 
1352
1350
  # Appending arguments to list for validation
1353
1351
  arg_info_matrix = []
@@ -1442,7 +1440,8 @@ class AutoML:
1442
1440
  # Saving data transformation parameters to the specified table
1443
1441
  sv_models = pd.concat([sv_models, df], ignore_index=True, sort=False)
1444
1442
 
1445
- copy_to_sql(df = sv_models, table_name=table_name, if_exists='replace', types={'DATA_PARAMS':BLOB})
1443
+ copy_to_sql(df = sv_models, table_name=table_name, if_exists='replace', types={'DATA_PARAMS':BLOB,
1444
+ 'PARAMETERS':VARCHAR(length=32000, charset='UNICODE')})
1446
1445
 
1447
1446
  print('Model Deployment Completed Successfully.')
1448
1447
 
@@ -1793,6 +1792,185 @@ class AutoML:
1793
1792
 
1794
1793
  db_drop_table(table_name)
1795
1794
 
1795
+ @collect_queryband(queryband="AutoML_get_persisted_tables")
1796
+ def get_persisted_tables(self):
1797
+ """
1798
+ DESCRIPTION:
1799
+ Get the list of the tables that are persisted in the database.
1800
+ Note:
1801
+ * User is responsible for keeping track of the persistent tables
1802
+ and cleanup of the same if required.
1803
+
1804
+ PARAMETERS:
1805
+ None
1806
+
1807
+ RETURNS:
1808
+ Dictionary, containing the list of table names that mapped to the stage
1809
+ at which it was generated.
1810
+
1811
+ RAISES:
1812
+ TeradataMlException.
1813
+
1814
+ EXAMPLES:
1815
+ # Create an instance of the AutoML called "obj"
1816
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
1817
+ # 'persist' argument must be set to True in the AutoML object.
1818
+ >>> obj = AutoML(verbose=2, max_models=10, persist=True)
1819
+
1820
+ # Load and fit the data.
1821
+ >>> load_example_data("teradataml", "titanic")
1822
+ >>> titanic_data = DataFrame("titanic")
1823
+ >>> obj.fit(data = titanic_data, target_column = titanic.survived)
1824
+
1825
+ # Get the list of tables that are persisted in the database.
1826
+ >>> obj.get_persisted_tables()
1827
+ """
1828
+ # Check if fit is called
1829
+ _Validators._validate_dependent_method("get_persisted_tables", "fit", self._is_fit_called)
1830
+
1831
+ # check if persist is passed as argument and is set to True
1832
+ persist_val = True if self.kwargs.get('persist', False) else None
1833
+
1834
+ _Validators._validate_dependent_argument("get_persisted_tables", True,
1835
+ "persist", persist_val,
1836
+ msg_arg_value='True')
1837
+
1838
+ # result table names
1839
+ return self._intermediate_table_names
1840
+
1841
+ def _raise_error(self, method_name, error_msg):
1842
+ """
1843
+ DESCRIPTION:
1844
+ Internal Function raises an error message when a method
1845
+ fails to execute.
1846
+
1847
+ PARAMETERS:
1848
+ method_name:
1849
+ Required Argument.
1850
+ Specifies the method name that failed to execute.
1851
+ Types: str
1852
+
1853
+ error_msg:
1854
+ Required Argument.
1855
+ Specifies the error message to be displayed.
1856
+ Types: str
1857
+
1858
+ RAISES:
1859
+ TeradataMlException.
1860
+
1861
+ EXAMPLES:
1862
+ >>> self._raise_error("fit", "fit() method must be called before 'deploy'.")
1863
+ """
1864
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1865
+ f'{method_name} method',
1866
+ error_msg)
1867
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1868
+
1869
+ @staticmethod
1870
+ def visualize(**kwargs):
1871
+ """
1872
+ DESCRIPTION:
1873
+ Function visualizes the data using various plots such as heatmap,
1874
+ pair plot, histogram, univariate plot, count plot, box plot, and target distribution.
1875
+
1876
+ PARAMETERS:
1877
+ data:
1878
+ Required Argument.
1879
+ Specifies the input teradataml DataFrame for plotting.
1880
+ Types: teradataml Dataframe
1881
+
1882
+ target_column:
1883
+ Required Argument.
1884
+ Specifies the name of the target column in "data".
1885
+ Note:
1886
+ * "target_column" must be of numeric type.
1887
+ Types: str
1888
+
1889
+ plot_type:
1890
+ Optional Argument.
1891
+ Specifies the type of plot to be displayed.
1892
+ Default Value: "target"
1893
+ Permitted Values:
1894
+ * "heatmap": Displays a heatmap of feature correlations.
1895
+ * "pair": Displays a pair plot of features.
1896
+ * "density": Displays a density plot of features.
1897
+ * "count": Displays a count plot of categorical features.
1898
+ * "box": Displays a box plot of numerical features.
1899
+ * "target": Displays the distribution of the target variable.
1900
+ * "all": Displays all the plots.
1901
+ Types: str, list of str
1902
+
1903
+ length:
1904
+ Optional Argument.
1905
+ Specifies the length of the plot.
1906
+ Default Value: 10
1907
+ Types: int
1908
+
1909
+ breadth:
1910
+ Optional Argument.
1911
+ Specifies the breadth of the plot.
1912
+ Default Value: 8
1913
+ Types: int
1914
+
1915
+ columns:
1916
+ Optional Argument.
1917
+ Specifies the column names to be used for plotting.
1918
+ Types: str or list of string
1919
+
1920
+ max_features:
1921
+ Optional Argument.
1922
+ Specifies the maximum number of features to be used for plotting.
1923
+ Default Value: 10
1924
+ Note:
1925
+ * It applies separately to categorical and numerical features.
1926
+ Types: int
1927
+
1928
+ problem_type:
1929
+ Optional Argument.
1930
+ Specifies the type of problem.
1931
+ Permitted Values:
1932
+ * 'regression'
1933
+ * 'classification'
1934
+ Types: str
1935
+
1936
+ RETURNS:
1937
+ None
1938
+
1939
+ RAISES:
1940
+ TeradataMlException.
1941
+
1942
+ EXAMPLES:
1943
+ # Import either of AutoML or AutoClassifier or AutoRegressor or Autodataprep
1944
+ # from teradataml.
1945
+ >>> from teradataml import AutoML
1946
+ >>> from teradataml import DataFrame
1947
+ >>> load_example_data("teradataml", "titanic")
1948
+ >>> titanic_data = DataFrame("titanic")
1949
+ # Example 1: Visualize the data using AutoML class.
1950
+ >>> AutoML.visualize(data = titanic_data,
1951
+ ... target_column = 'survived',
1952
+ ... plot_type = ['heatmap', 'pair', 'histogram', 'target'],
1953
+ ... length = 10,
1954
+ ... breadth = 8,
1955
+ ... max_features = 10,
1956
+ ... problem_type = 'classification')
1957
+
1958
+ # Example 2: Visualize the data using AutoDataPrep class.
1959
+ >>> from teradataml import AutoDataPrep
1960
+ >>> obj = AutoDataPrep(task_type="classification")
1961
+ >>> obj.fit(data = titanic_data, target_column = 'survived')
1962
+
1963
+ # Retrieve the data from AutoDataPrep object.
1964
+ >>> datas = obj.get_data()
1965
+
1966
+ >>> AutoDataPrep.visualize(data = datas['lasso_train'],
1967
+ ... target_column = 'survived',
1968
+ ... plot_type = 'all'
1969
+ ... length = 20,
1970
+ ... breadth = 15)
1971
+ """
1972
+ _FeatureExplore._visualize(**kwargs)
1973
+
1796
1974
  @staticmethod
1797
1975
  def generate_custom_config(file_name = "custom"):
1798
1976
  """
@@ -1877,7 +2055,7 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1877
2055
 
1878
2056
 
1879
2057
  def _regression(self,
1880
- model_list = None,
2058
+ model_list=None,
1881
2059
  auto = False,
1882
2060
  verbose = 0,
1883
2061
  max_runtime_secs = None,
@@ -1945,16 +2123,23 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1945
2123
  Default Value: False
1946
2124
  Types: bool
1947
2125
 
2126
+ seed:
2127
+ Optional Argument.
2128
+ Specifies the random seed for reproducibility.
2129
+ Default Value: 42
2130
+ Types: int
2131
+
1948
2132
  RETURNS:
1949
2133
  a tuple containing, model information and leaderboard.
1950
2134
  """
2135
+
1951
2136
  # Feature Exploration Phase
1952
2137
  _FeatureExplore.__init__(self,
1953
2138
  data = self.data,
1954
2139
  target_column = self.target_column,
1955
2140
  verbose=verbose)
1956
2141
  if verbose > 0:
1957
- self._exploration()
2142
+ self._exploration(**kwargs)
1958
2143
  # Feature Engineering Phase
1959
2144
  _FeatureEngineering.__init__(self,
1960
2145
  data = self.data,
@@ -1965,7 +2150,8 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1965
2150
  **kwargs)
1966
2151
  # Start time
1967
2152
  start_time = time.time()
1968
- data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
2153
+ data, excluded_columns, target_label,\
2154
+ data_transformation_params, data_mapping = self.feature_engineering(auto)
1969
2155
 
1970
2156
  # Data preparation Phase
1971
2157
  _DataPreparation.__init__(self,
@@ -1975,8 +2161,18 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1975
2161
  excluded_columns = excluded_columns,
1976
2162
  custom_data = self.custom_data,
1977
2163
  data_transform_dict = data_transformation_params,
2164
+ data_mapping = data_mapping,
1978
2165
  **kwargs)
1979
- features, data_transformation_params = self.data_preparation(auto)
2166
+ features, data_transformation_params,\
2167
+ data_mapping = self.data_preparation(auto)
2168
+
2169
+ if kwargs.get('auto_dataprep', False):
2170
+ models_info = None
2171
+ leaderboard = None
2172
+ target_count = None
2173
+ return (models_info, leaderboard,
2174
+ target_count, target_label,
2175
+ data_transformation_params, data_mapping)
1980
2176
 
1981
2177
  # Calculating max_runtime_secs for model training by,
1982
2178
  # subtracting the time taken for feature engineering and data preparation
@@ -1998,12 +2194,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1998
2194
  custom_data = self.custom_data,
1999
2195
  **kwargs)
2000
2196
  models_info, leaderboard, target_count = self.model_training(auto = auto,
2001
- max_runtime_secs = max_runtime_secs,
2002
- stopping_metric = stopping_metric,
2003
- stopping_tolerance = stopping_tolerance,
2004
- max_models = max_models)
2197
+ max_runtime_secs = max_runtime_secs,
2198
+ stopping_metric = stopping_metric,
2199
+ stopping_tolerance = stopping_tolerance,
2200
+ max_models = max_models)
2005
2201
 
2006
- return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
2202
+ return (models_info, leaderboard,
2203
+ target_count, target_label,
2204
+ data_transformation_params, data_mapping)
2007
2205
 
2008
2206
  class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
2009
2207
 
@@ -2036,7 +2234,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2036
2234
  self.custom_data = custom_data
2037
2235
 
2038
2236
  def _classification(self,
2039
- model_list = None,
2237
+ model_list=None,
2040
2238
  auto = False,
2041
2239
  verbose = 0,
2042
2240
  max_runtime_secs = None,
@@ -2103,18 +2301,26 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2103
2301
  session.
2104
2302
  Default Value: False
2105
2303
  Types: bool
2304
+
2305
+ seed:
2306
+ Optional Argument.
2307
+ Specifies the random seed for reproducibility.
2308
+ Default Value: 42
2309
+ Types: int
2106
2310
 
2107
2311
  RETURNS:
2108
2312
  a tuple containing, model information and leaderboard.
2109
2313
  """
2314
+
2110
2315
 
2111
2316
  # Feature Exploration Phase
2112
2317
  _FeatureExplore.__init__(self,
2113
- data = self.data,
2114
- target_column = self.target_column,
2115
- verbose=verbose)
2318
+ data = self.data,
2319
+ target_column = self.target_column,
2320
+ verbose=verbose,
2321
+ task_type = "classification")
2116
2322
  if verbose > 0:
2117
- self._exploration()
2323
+ self._exploration(**kwargs)
2118
2324
  # Feature Engineeting Phase
2119
2325
  _FeatureEngineering.__init__(self,
2120
2326
  data = self.data,
@@ -2126,7 +2332,9 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2126
2332
  **kwargs)
2127
2333
  # Start time
2128
2334
  start_time = time.time()
2129
- data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
2335
+ data, excluded_columns, target_label,\
2336
+ data_transformation_params, data_mapping = self.feature_engineering(auto)
2337
+
2130
2338
  # Data Preparation Phase
2131
2339
  _DataPreparation.__init__(self,
2132
2340
  data = self.data,
@@ -2136,8 +2344,19 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2136
2344
  custom_data = self.custom_data,
2137
2345
  data_transform_dict = data_transformation_params,
2138
2346
  task_type = "Classification",
2347
+ data_mapping = data_mapping,
2139
2348
  **kwargs)
2140
- features, data_transformation_params = self.data_preparation(auto)
2349
+
2350
+ features, data_transformation_params, \
2351
+ data_mapping = self.data_preparation(auto)
2352
+
2353
+ if kwargs.get('auto_dataprep', False):
2354
+ models_info = None
2355
+ leaderboard = None
2356
+ target_count = None
2357
+ return (models_info, leaderboard,
2358
+ target_count, target_label,
2359
+ data_transformation_params, data_mapping)
2141
2360
 
2142
2361
  # Calculating max_runtime_secs for model training by,
2143
2362
  # subtracting the time taken for feature engineering and data preparation
@@ -2159,28 +2378,14 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2159
2378
  custom_data = self.custom_data,
2160
2379
  **kwargs)
2161
2380
  models_info, leaderboard, target_count = self.model_training(auto = auto,
2162
- max_runtime_secs = max_runtime_secs,
2163
- stopping_metric = stopping_metric,
2164
- stopping_tolerance = stopping_tolerance,
2165
- max_models = max_models)
2381
+ max_runtime_secs = max_runtime_secs,
2382
+ stopping_metric = stopping_metric,
2383
+ stopping_tolerance = stopping_tolerance,
2384
+ max_models = max_models)
2166
2385
 
2167
- return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
2168
-
2169
- def _target_column_details(self):
2170
- """
2171
- DESCRIPTION:
2172
- Internal function displays the target column distribution of Target column/ Response column.
2173
- """
2174
- # If data visualization libraries are available
2175
- if self._check_visualization_libraries() and not _is_terminal():
2176
- import matplotlib.pyplot as plt
2177
- import seaborn as sns
2178
- self._display_msg(msg='\nTarget Column Distribution:',
2179
- show_data=True)
2180
- plt.figure(figsize=(6, 6))
2181
- # Ploting a histogram for target column
2182
- sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
2183
- plt.show()
2386
+ return (models_info, leaderboard,
2387
+ target_count, target_label,
2388
+ data_transformation_params, data_mapping)
2184
2389
 
2185
2390
  def _check_data_imbalance(self,
2186
2391
  data=None):
@@ -2324,6 +2529,9 @@ class AutoRegressor(AutoML):
2324
2529
  """
2325
2530
  DESCRIPTION:
2326
2531
  AutoRegressor is a special purpose AutoML feature to run regression specific tasks.
2532
+ Note:
2533
+ * configure.temp_object_type="VT" follows sequential execution.
2534
+
2327
2535
 
2328
2536
  PARAMETERS:
2329
2537
  include:
@@ -2405,8 +2613,17 @@ class AutoRegressor(AutoML):
2405
2613
  results are persisted in a table; otherwise,
2406
2614
  results are garbage collected at the end of the
2407
2615
  session.
2616
+ Note:
2617
+ * User is responsible for cleanup of the persisted tables. List of persisted tables
2618
+ in current session can be viewed using get_persisted_tables() method.
2408
2619
  Default Value: False
2409
2620
  Types: bool
2621
+
2622
+ seed:
2623
+ Optional Argument.
2624
+ Specifies the random seed for reproducibility.
2625
+ Default Value: 42
2626
+ Types: int
2410
2627
 
2411
2628
  RETURNS:
2412
2629
  Instance of AutoRegressor.
@@ -2555,6 +2772,9 @@ class AutoClassifier(AutoML):
2555
2772
  """
2556
2773
  DESCRIPTION:
2557
2774
  AutoClassifier is a special purpose AutoML feature to run classification specific tasks.
2775
+ Note:
2776
+ * configure.temp_object_type="VT" follows sequential execution.
2777
+
2558
2778
 
2559
2779
  PARAMETERS:
2560
2780
  include:
@@ -2636,8 +2856,17 @@ class AutoClassifier(AutoML):
2636
2856
  results are persisted in a table; otherwise,
2637
2857
  results are garbage collected at the end of the
2638
2858
  session.
2859
+ Note:
2860
+ * User is responsible for cleanup of the persisted tables. List of persisted tables
2861
+ in current session can be viewed using get_persisted_tables() method.
2639
2862
  Default Value: False
2640
2863
  Types: bool
2864
+
2865
+ seed:
2866
+ Optional Argument.
2867
+ Specifies the random seed for reproducibility.
2868
+ Default Value: 42
2869
+ Types: int
2641
2870
 
2642
2871
  RETURNS:
2643
2872
  Instance of AutoClassifier.
@@ -2859,4 +3088,4 @@ class AutoClassifier(AutoML):
2859
3088
  stopping_tolerance=self.stopping_tolerance,
2860
3089
  max_models=self.max_models,
2861
3090
  custom_config_file=self.custom_config_file,
2862
- **kwargs)
3091
+ **kwargs)