teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (107) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +86 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +7 -12
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +15 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +2 -2
  17. teradataml/common/constants.py +61 -26
  18. teradataml/common/messagecodes.py +2 -1
  19. teradataml/common/messages.py +5 -4
  20. teradataml/common/utils.py +255 -37
  21. teradataml/context/context.py +225 -87
  22. teradataml/data/apriori_example.json +22 -0
  23. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  24. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  25. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  26. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  27. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  29. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  30. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  31. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  32. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  34. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  35. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  37. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  38. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  39. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  40. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  41. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  42. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  43. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  47. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  48. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  49. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  51. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  53. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  54. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  55. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  56. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  57. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  58. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  59. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  60. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  61. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  62. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  63. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  65. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  66. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  67. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  68. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  69. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  70. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  71. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  72. teradataml/data/ner_dict.csv +8 -0
  73. teradataml/data/ner_input_eng.csv +7 -0
  74. teradataml/data/ner_rule.csv +5 -0
  75. teradataml/data/pos_input.csv +40 -0
  76. teradataml/data/tdnerextractor_example.json +14 -0
  77. teradataml/data/teradataml_example.json +13 -0
  78. teradataml/data/textmorph_example.json +5 -0
  79. teradataml/data/to_num_data.csv +4 -0
  80. teradataml/data/tochar_data.csv +5 -0
  81. teradataml/data/trans_dense.csv +16 -0
  82. teradataml/data/trans_sparse.csv +55 -0
  83. teradataml/dataframe/copy_to.py +37 -26
  84. teradataml/dataframe/data_transfer.py +61 -45
  85. teradataml/dataframe/dataframe.py +130 -50
  86. teradataml/dataframe/dataframe_utils.py +15 -2
  87. teradataml/dataframe/functions.py +109 -9
  88. teradataml/dataframe/sql.py +328 -76
  89. teradataml/dbutils/dbutils.py +33 -13
  90. teradataml/dbutils/filemgr.py +14 -10
  91. teradataml/lib/aed_0_1.dll +0 -0
  92. teradataml/opensource/_base.py +6 -157
  93. teradataml/options/configure.py +4 -5
  94. teradataml/scriptmgmt/UserEnv.py +305 -38
  95. teradataml/scriptmgmt/lls_utils.py +376 -130
  96. teradataml/store/__init__.py +1 -1
  97. teradataml/table_operators/Apply.py +16 -1
  98. teradataml/table_operators/Script.py +20 -1
  99. teradataml/table_operators/table_operator_util.py +58 -9
  100. teradataml/utils/dtypes.py +2 -1
  101. teradataml/utils/internal_buffer.py +22 -2
  102. teradataml/utils/validators.py +313 -57
  103. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +89 -14
  104. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +107 -77
  105. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  106. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  107. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -188,6 +188,9 @@ class AutoML:
188
188
  results are persisted in a table; otherwise,
189
189
  results are garbage collected at the end of the
190
190
  session.
191
+ Note:
192
+ * User is responsible for cleanup of the persisted tables. List of persisted tables
193
+ in current session can be viewed using get_persisted_tables() method.
191
194
  Default Value: False
192
195
  Types: bool
193
196
 
@@ -476,8 +479,13 @@ class AutoML:
476
479
  self._is_fit_called = False
477
480
  self._is_load_model_called = False
478
481
  self.kwargs = kwargs
479
- self.table_name_mapping={}
480
-
482
+ self.table_name_mapping = {}
483
+ # Stores the table name of all intermediate datas
484
+ self._intermediate_table_names={}
485
+ self._auto_dataprep = False
486
+ self._phases = None
487
+ self._progressbar_prefix = "AutoML Running:"
488
+
481
489
  @collect_queryband(queryband="AutoML_fit")
482
490
  def fit(self,
483
491
  data,
@@ -602,15 +610,25 @@ class AutoML:
602
610
  clf = task_cls(self.data, self.target_column, self.custom_data)
603
611
 
604
612
  self.model_info, self.leader_board, self.target_count, self.target_label, \
605
- self.data_transformation_params, self.table_name_mapping = getattr(clf, cls_method)(
606
- model_list = self.model_list,
607
- auto = self.auto,
608
- verbose = self.verbose,
609
- max_runtime_secs = self.max_runtime_secs,
610
- stopping_metric = self.stopping_metric,
611
- stopping_tolerance = self.stopping_tolerance,
612
- max_models = self.max_models,
613
- **self.kwargs)
613
+ self.data_transformation_params, self._intermediate_table_names = getattr(clf, cls_method)(
614
+ model_list = self.model_list,
615
+ auto = self.auto,
616
+ verbose = self.verbose,
617
+ max_runtime_secs = self.max_runtime_secs,
618
+ stopping_metric = self.stopping_metric,
619
+ stopping_tolerance = self.stopping_tolerance,
620
+ max_models = self.max_models,
621
+ auto_dataprep = self._auto_dataprep,
622
+ automl_phases = self._phases,
623
+ progress_prefix = self._progressbar_prefix,
624
+ **self.kwargs)
625
+
626
+
627
+ # table_name_mapping stores the table name of all intermediate datas (lasso, rfe, pca)
628
+ # used for training models
629
+ keys_to_extract = ['lasso_train', 'rfe_train', 'pca_train']
630
+ self.table_name_mapping = {key: self._intermediate_table_names[key] for key in keys_to_extract
631
+ if key in self._intermediate_table_names}
614
632
 
615
633
  # Model Evaluation Phase
616
634
  self.m_evaluator = _ModelEvaluator(self.model_info,
@@ -680,13 +698,9 @@ class AutoML:
680
698
  >>> prediction = automl_obj.predict(admissions_test, rank=3, use_loaded_models=True)
681
699
  >>> prediction
682
700
  """
683
- # Checking if fit or load model is called before predict, If not raise error
684
- if not self._is_fit_called and not self._is_load_model_called:
685
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
686
- "'predict' method", \
687
- "'fit' or 'load' method must be called before" \
688
- " running predict.")
689
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
701
+ # Raise error if fit is not called before predict
702
+ _Validators._validate_dependent_method("predict", ["fit", "load"],
703
+ [self._is_fit_called, self._is_load_model_called])
690
704
 
691
705
  # Appending predict arguments to list for validation.
692
706
  arg_info_pred_matrix = []
@@ -862,13 +876,10 @@ class AutoML:
862
876
  >>> evaluation = automl_obj.evaluate(admissions_test, rank=3, use_loaded_models=True)
863
877
  >>> evaluation
864
878
  """
865
- if not self._is_fit_called and not self._is_load_model_called:
866
- # raise ValueError("fit() method must be called before evaluating.")
867
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
868
- "'evaluate' method", \
869
- "'fit' or 'load' method must be called before" \
870
- " running evaluate.")
871
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
879
+ # Raising exception if fit or load model is not called before evaluate
880
+ _Validators._validate_dependent_method("evaluate", ["fit", "load"],
881
+ [self._is_fit_called, self._is_load_model_called])
882
+
872
883
  # Appending evaluate arguments to list for validation.
873
884
  arg_info_pred_matrix = []
874
885
  arg_info_pred_matrix.append(["data", data, False, (DataFrame), True])
@@ -1017,13 +1028,9 @@ class AutoML:
1017
1028
  # Generate leaderboard using leaderboard() method on "automl_obj".
1018
1029
  >>> automl_obj.leaderboard()
1019
1030
  """
1020
- if not self._is_fit_called:
1021
- # raise ValueError("fit() method must be called before generating leaderboard.")
1022
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1023
- "'leaderboard' method", \
1024
- "'fit' method must be called before" \
1025
- " generating leaderboard.")
1026
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1031
+ # Raise error if fit is not called before leaderboard
1032
+ _Validators._validate_dependent_method("leaderboard", "fit", self._is_fit_called)
1033
+
1027
1034
  return self.leader_board
1028
1035
 
1029
1036
  @collect_queryband(queryband="AutoML_leader")
@@ -1046,13 +1053,9 @@ class AutoML:
1046
1053
  # Display best performing model using leader() method on "automl_obj".
1047
1054
  >>> automl_obj.leader()
1048
1055
  """
1049
- if not self._is_fit_called:
1050
- # raise ValueError("fit() method must be called before generating leader.")
1051
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1052
- "'leader' method", \
1053
- "'fit' method must be called before" \
1054
- " generating leader.")
1055
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1056
+ # Raise error if fit is not called before leader
1057
+ _Validators._validate_dependent_method("leader", "fit", self._is_fit_called)
1058
+
1056
1059
  record = self.leader_board
1057
1060
  if not _is_terminal():
1058
1061
  display(record[record['RANK'] == 1])
@@ -1125,13 +1128,9 @@ class AutoML:
1125
1128
  >>> automl_obj.model_hyperparameters(rank=1)
1126
1129
  """
1127
1130
 
1128
- if not self._is_fit_called and not self._is_load_model_called:
1129
- # raise ValueError("fit() or load() method must be called before getting hyperparameters.")
1130
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1131
- "'model_hyperparameters' method",
1132
- "No models available to get hyperparameters. " \
1133
- "Run 'fit()' or 'load()' methods to get models.")
1134
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1131
+ # Raise error if fit or load model is not called before model_hyperparameters
1132
+ _Validators._validate_dependent_method("model_hyperparameters", ["fit", "load"],
1133
+ [self._is_fit_called, self._is_load_model_called])
1135
1134
 
1136
1135
  arg_info_matrix = []
1137
1136
  arg_info_matrix.append(["rank", rank, True, (int), True])
@@ -1270,28 +1269,18 @@ class AutoML:
1270
1269
  start_rank, end_rank = ranks.start, ranks.stop
1271
1270
 
1272
1271
  # Check if both parts are non-negative integers
1273
- if not (start_rank > 0 and end_rank > 0):
1274
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1275
- "'deploy' method", \
1276
- "Provided start and end rank in 'ranks' "\
1277
- "must be positive non-zero integers.")
1278
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1272
+ _Validators._validate_positive_int(start_rank, "ranks(start)")
1273
+ _Validators._validate_positive_int(end_rank, "ranks(end)")
1279
1274
 
1280
1275
  # Check if start_rank is less than or equal to end_rank
1281
1276
  if start_rank > end_rank:
1282
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1283
- "'deploy' method", \
1284
- "Provided start rank in 'ranks' must be less than"\
1285
- " or equal to end rank in 'ranks'.")
1286
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1277
+ err = "Provided start rank in 'ranks' must be less than or equal to end rank in 'ranks'."
1278
+ self._raise_error("deploy", err)
1287
1279
 
1288
1280
  # check end rank is less than or equal to total models
1289
1281
  if end_rank > self.leader_board.RANK.max():
1290
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1291
- "'deploy' method", \
1292
- "Provided end rank in 'ranks' must be less than"\
1293
- " or equal to total models available.")
1294
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1282
+ err = "Provided end rank in 'ranks' must be less than or equal to total models available."
1283
+ self._raise_error("deploy", err)
1295
1284
 
1296
1285
  return start_rank, end_rank
1297
1286
 
@@ -1356,12 +1345,7 @@ class AutoML:
1356
1345
  >>> obj.deploy("model_table", ranks=range(2,6))
1357
1346
  """
1358
1347
  # raise Error if fit is not called
1359
- if not self._is_fit_called:
1360
- err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1361
- "'deploy' method", \
1362
- "'fit' method must be called before" \
1363
- " 'deploy'.")
1364
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1348
+ _Validators._validate_dependent_method("deploy", "fit", self._is_fit_called)
1365
1349
 
1366
1350
  # Appending arguments to list for validation
1367
1351
  arg_info_matrix = []
@@ -1808,6 +1792,185 @@ class AutoML:
1808
1792
 
1809
1793
  db_drop_table(table_name)
1810
1794
 
1795
+ @collect_queryband(queryband="AutoML_get_persisted_tables")
1796
+ def get_persisted_tables(self):
1797
+ """
1798
+ DESCRIPTION:
1799
+ Get the list of the tables that are persisted in the database.
1800
+ Note:
1801
+ * User is responsible for keeping track of the persistent tables
1802
+ and cleanup of the same if required.
1803
+
1804
+ PARAMETERS:
1805
+ None
1806
+
1807
+ RETURNS:
1808
+ Dictionary, containing the list of table names that mapped to the stage
1809
+ at which it was generated.
1810
+
1811
+ RAISES:
1812
+ TeradataMlException.
1813
+
1814
+ EXAMPLES:
1815
+ # Create an instance of the AutoML called "obj"
1816
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
1817
+ # 'persist' argument must be set to True in the AutoML object.
1818
+ >>> obj = AutoML(verbose=2, max_models=10, persist=True)
1819
+
1820
+ # Load and fit the data.
1821
+ >>> load_example_data("teradataml", "titanic")
1822
+ >>> titanic_data = DataFrame("titanic")
1823
+ >>> obj.fit(data = titanic_data, target_column = titanic.survived)
1824
+
1825
+ # Get the list of tables that are persisted in the database.
1826
+ >>> obj.get_persisted_tables()
1827
+ """
1828
+ # Check if fit is called
1829
+ _Validators._validate_dependent_method("get_persisted_tables", "fit", self._is_fit_called)
1830
+
1831
+ # check if persist is passed as argument and is set to True
1832
+ persist_val = True if self.kwargs.get('persist', False) else None
1833
+
1834
+ _Validators._validate_dependent_argument("get_persisted_tables", True,
1835
+ "persist", persist_val,
1836
+ msg_arg_value='True')
1837
+
1838
+ # result table names
1839
+ return self._intermediate_table_names
1840
+
1841
+ def _raise_error(self, method_name, error_msg):
1842
+ """
1843
+ DESCRIPTION:
1844
+ Internal Function raises an error message when a method
1845
+ fails to execute.
1846
+
1847
+ PARAMETERS:
1848
+ method_name:
1849
+ Required Argument.
1850
+ Specifies the method name that failed to execute.
1851
+ Types: str
1852
+
1853
+ error_msg:
1854
+ Required Argument.
1855
+ Specifies the error message to be displayed.
1856
+ Types: str
1857
+
1858
+ RAISES:
1859
+ TeradataMlException.
1860
+
1861
+ EXAMPLES:
1862
+ >>> self._raise_error("fit", "fit() method must be called before 'deploy'.")
1863
+ """
1864
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
1865
+ f'{method_name} method',
1866
+ error_msg)
1867
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1868
+
1869
+ @staticmethod
1870
+ def visualize(**kwargs):
1871
+ """
1872
+ DESCRIPTION:
1873
+ Function visualizes the data using various plots such as heatmap,
1874
+ pair plot, histogram, univariate plot, count plot, box plot, and target distribution.
1875
+
1876
+ PARAMETERS:
1877
+ data:
1878
+ Required Argument.
1879
+ Specifies the input teradataml DataFrame for plotting.
1880
+ Types: teradataml Dataframe
1881
+
1882
+ target_column:
1883
+ Required Argument.
1884
+ Specifies the name of the target column in "data".
1885
+ Note:
1886
+ * "target_column" must be of numeric type.
1887
+ Types: str
1888
+
1889
+ plot_type:
1890
+ Optional Argument.
1891
+ Specifies the type of plot to be displayed.
1892
+ Default Value: "target"
1893
+ Permitted Values:
1894
+ * "heatmap": Displays a heatmap of feature correlations.
1895
+ * "pair": Displays a pair plot of features.
1896
+ * "density": Displays a density plot of features.
1897
+ * "count": Displays a count plot of categorical features.
1898
+ * "box": Displays a box plot of numerical features.
1899
+ * "target": Displays the distribution of the target variable.
1900
+ * "all": Displays all the plots.
1901
+ Types: str, list of str
1902
+
1903
+ length:
1904
+ Optional Argument.
1905
+ Specifies the length of the plot.
1906
+ Default Value: 10
1907
+ Types: int
1908
+
1909
+ breadth:
1910
+ Optional Argument.
1911
+ Specifies the breadth of the plot.
1912
+ Default Value: 8
1913
+ Types: int
1914
+
1915
+ columns:
1916
+ Optional Argument.
1917
+ Specifies the column names to be used for plotting.
1918
+ Types: str or list of string
1919
+
1920
+ max_features:
1921
+ Optional Argument.
1922
+ Specifies the maximum number of features to be used for plotting.
1923
+ Default Value: 10
1924
+ Note:
1925
+ * It applies separately to categorical and numerical features.
1926
+ Types: int
1927
+
1928
+ problem_type:
1929
+ Optional Argument.
1930
+ Specifies the type of problem.
1931
+ Permitted Values:
1932
+ * 'regression'
1933
+ * 'classification'
1934
+ Types: str
1935
+
1936
+ RETURNS:
1937
+ None
1938
+
1939
+ RAISES:
1940
+ TeradataMlException.
1941
+
1942
+ EXAMPLES:
1943
+ # Import either of AutoML or AutoClassifier or AutoRegressor or Autodataprep
1944
+ # from teradataml.
1945
+ >>> from teradataml import AutoML
1946
+ >>> from teradataml import DataFrame
1947
+ >>> load_example_data("teradataml", "titanic")
1948
+ >>> titanic_data = DataFrame("titanic")
1949
+ # Example 1: Visualize the data using AutoML class.
1950
+ >>> AutoML.visualize(data = titanic_data,
1951
+ ... target_column = 'survived',
1952
+ ... plot_type = ['heatmap', 'pair', 'histogram', 'target'],
1953
+ ... length = 10,
1954
+ ... breadth = 8,
1955
+ ... max_features = 10,
1956
+ ... problem_type = 'classification')
1957
+
1958
+ # Example 2: Visualize the data using AutoDataPrep class.
1959
+ >>> from teradataml import AutoDataPrep
1960
+ >>> obj = AutoDataPrep(task_type="classification")
1961
+ >>> obj.fit(data = titanic_data, target_column = 'survived')
1962
+
1963
+ # Retrieve the data from AutoDataPrep object.
1964
+ >>> datas = obj.get_data()
1965
+
1966
+ >>> AutoDataPrep.visualize(data = datas['lasso_train'],
1967
+ ... target_column = 'survived',
1968
+ ... plot_type = 'all'
1969
+ ... length = 20,
1970
+ ... breadth = 15)
1971
+ """
1972
+ _FeatureExplore._visualize(**kwargs)
1973
+
1811
1974
  @staticmethod
1812
1975
  def generate_custom_config(file_name = "custom"):
1813
1976
  """
@@ -1892,7 +2055,7 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1892
2055
 
1893
2056
 
1894
2057
  def _regression(self,
1895
- model_list = None,
2058
+ model_list=None,
1896
2059
  auto = False,
1897
2060
  verbose = 0,
1898
2061
  max_runtime_secs = None,
@@ -1969,13 +2132,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1969
2132
  RETURNS:
1970
2133
  a tuple containing, model information and leaderboard.
1971
2134
  """
2135
+
1972
2136
  # Feature Exploration Phase
1973
2137
  _FeatureExplore.__init__(self,
1974
2138
  data = self.data,
1975
2139
  target_column = self.target_column,
1976
2140
  verbose=verbose)
1977
2141
  if verbose > 0:
1978
- self._exploration()
2142
+ self._exploration(**kwargs)
1979
2143
  # Feature Engineering Phase
1980
2144
  _FeatureEngineering.__init__(self,
1981
2145
  data = self.data,
@@ -1986,7 +2150,8 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1986
2150
  **kwargs)
1987
2151
  # Start time
1988
2152
  start_time = time.time()
1989
- data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
2153
+ data, excluded_columns, target_label,\
2154
+ data_transformation_params, data_mapping = self.feature_engineering(auto)
1990
2155
 
1991
2156
  # Data preparation Phase
1992
2157
  _DataPreparation.__init__(self,
@@ -1996,8 +2161,18 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
1996
2161
  excluded_columns = excluded_columns,
1997
2162
  custom_data = self.custom_data,
1998
2163
  data_transform_dict = data_transformation_params,
2164
+ data_mapping = data_mapping,
1999
2165
  **kwargs)
2000
- features, data_transformation_params = self.data_preparation(auto)
2166
+ features, data_transformation_params,\
2167
+ data_mapping = self.data_preparation(auto)
2168
+
2169
+ if kwargs.get('auto_dataprep', False):
2170
+ models_info = None
2171
+ leaderboard = None
2172
+ target_count = None
2173
+ return (models_info, leaderboard,
2174
+ target_count, target_label,
2175
+ data_transformation_params, data_mapping)
2001
2176
 
2002
2177
  # Calculating max_runtime_secs for model training by,
2003
2178
  # subtracting the time taken for feature engineering and data preparation
@@ -2019,12 +2194,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
2019
2194
  custom_data = self.custom_data,
2020
2195
  **kwargs)
2021
2196
  models_info, leaderboard, target_count = self.model_training(auto = auto,
2022
- max_runtime_secs = max_runtime_secs,
2023
- stopping_metric = stopping_metric,
2024
- stopping_tolerance = stopping_tolerance,
2025
- max_models = max_models)
2197
+ max_runtime_secs = max_runtime_secs,
2198
+ stopping_metric = stopping_metric,
2199
+ stopping_tolerance = stopping_tolerance,
2200
+ max_models = max_models)
2026
2201
 
2027
- return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
2202
+ return (models_info, leaderboard,
2203
+ target_count, target_label,
2204
+ data_transformation_params, data_mapping)
2028
2205
 
2029
2206
  class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
2030
2207
 
@@ -2057,7 +2234,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2057
2234
  self.custom_data = custom_data
2058
2235
 
2059
2236
  def _classification(self,
2060
- model_list = None,
2237
+ model_list=None,
2061
2238
  auto = False,
2062
2239
  verbose = 0,
2063
2240
  max_runtime_secs = None,
@@ -2134,14 +2311,16 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2134
2311
  RETURNS:
2135
2312
  a tuple containing, model information and leaderboard.
2136
2313
  """
2314
+
2137
2315
 
2138
2316
  # Feature Exploration Phase
2139
2317
  _FeatureExplore.__init__(self,
2140
- data = self.data,
2141
- target_column = self.target_column,
2142
- verbose=verbose)
2318
+ data = self.data,
2319
+ target_column = self.target_column,
2320
+ verbose=verbose,
2321
+ task_type = "classification")
2143
2322
  if verbose > 0:
2144
- self._exploration()
2323
+ self._exploration(**kwargs)
2145
2324
  # Feature Engineeting Phase
2146
2325
  _FeatureEngineering.__init__(self,
2147
2326
  data = self.data,
@@ -2153,7 +2332,9 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2153
2332
  **kwargs)
2154
2333
  # Start time
2155
2334
  start_time = time.time()
2156
- data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
2335
+ data, excluded_columns, target_label,\
2336
+ data_transformation_params, data_mapping = self.feature_engineering(auto)
2337
+
2157
2338
  # Data Preparation Phase
2158
2339
  _DataPreparation.__init__(self,
2159
2340
  data = self.data,
@@ -2163,8 +2344,19 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2163
2344
  custom_data = self.custom_data,
2164
2345
  data_transform_dict = data_transformation_params,
2165
2346
  task_type = "Classification",
2347
+ data_mapping = data_mapping,
2166
2348
  **kwargs)
2167
- features, data_transformation_params = self.data_preparation(auto)
2349
+
2350
+ features, data_transformation_params, \
2351
+ data_mapping = self.data_preparation(auto)
2352
+
2353
+ if kwargs.get('auto_dataprep', False):
2354
+ models_info = None
2355
+ leaderboard = None
2356
+ target_count = None
2357
+ return (models_info, leaderboard,
2358
+ target_count, target_label,
2359
+ data_transformation_params, data_mapping)
2168
2360
 
2169
2361
  # Calculating max_runtime_secs for model training by,
2170
2362
  # subtracting the time taken for feature engineering and data preparation
@@ -2186,28 +2378,14 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
2186
2378
  custom_data = self.custom_data,
2187
2379
  **kwargs)
2188
2380
  models_info, leaderboard, target_count = self.model_training(auto = auto,
2189
- max_runtime_secs = max_runtime_secs,
2190
- stopping_metric = stopping_metric,
2191
- stopping_tolerance = stopping_tolerance,
2192
- max_models = max_models)
2381
+ max_runtime_secs = max_runtime_secs,
2382
+ stopping_metric = stopping_metric,
2383
+ stopping_tolerance = stopping_tolerance,
2384
+ max_models = max_models)
2193
2385
 
2194
- return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
2195
-
2196
- def _target_column_details(self):
2197
- """
2198
- DESCRIPTION:
2199
- Internal function displays the target column distribution of Target column/ Response column.
2200
- """
2201
- # If data visualization libraries are available
2202
- if self._check_visualization_libraries() and not _is_terminal():
2203
- import matplotlib.pyplot as plt
2204
- import seaborn as sns
2205
- self._display_msg(msg='\nTarget Column Distribution:',
2206
- show_data=True)
2207
- plt.figure(figsize=(6, 6))
2208
- # Ploting a histogram for target column
2209
- sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
2210
- plt.show()
2386
+ return (models_info, leaderboard,
2387
+ target_count, target_label,
2388
+ data_transformation_params, data_mapping)
2211
2389
 
2212
2390
  def _check_data_imbalance(self,
2213
2391
  data=None):
@@ -2435,6 +2613,9 @@ class AutoRegressor(AutoML):
2435
2613
  results are persisted in a table; otherwise,
2436
2614
  results are garbage collected at the end of the
2437
2615
  session.
2616
+ Note:
2617
+ * User is responsible for cleanup of the persisted tables. List of persisted tables
2618
+ in current session can be viewed using get_persisted_tables() method.
2438
2619
  Default Value: False
2439
2620
  Types: bool
2440
2621
 
@@ -2675,6 +2856,9 @@ class AutoClassifier(AutoML):
2675
2856
  results are persisted in a table; otherwise,
2676
2857
  results are garbage collected at the end of the
2677
2858
  session.
2859
+ Note:
2860
+ * User is responsible for cleanup of the persisted tables. List of persisted tables
2861
+ in current session can be viewed using get_persisted_tables() method.
2678
2862
  Default Value: False
2679
2863
  Types: bool
2680
2864
 
@@ -2904,4 +3088,4 @@ class AutoClassifier(AutoML):
2904
3088
  stopping_tolerance=self.stopping_tolerance,
2905
3089
  max_models=self.max_models,
2906
3090
  custom_config_file=self.custom_config_file,
2907
- **kwargs)
3091
+ **kwargs)