teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +86 -13
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +7 -12
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +16 -1
- teradataml/analytics/utils.py +15 -1
- teradataml/automl/__init__.py +290 -106
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +29 -10
- teradataml/automl/data_transformation.py +11 -0
- teradataml/automl/feature_engineering.py +64 -4
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +1 -1
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/constants.py +61 -26
- teradataml/common/messagecodes.py +2 -1
- teradataml/common/messages.py +5 -4
- teradataml/common/utils.py +255 -37
- teradataml/context/context.py +225 -87
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +13 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/dataframe/copy_to.py +37 -26
- teradataml/dataframe/data_transfer.py +61 -45
- teradataml/dataframe/dataframe.py +130 -50
- teradataml/dataframe/dataframe_utils.py +15 -2
- teradataml/dataframe/functions.py +109 -9
- teradataml/dataframe/sql.py +328 -76
- teradataml/dbutils/dbutils.py +33 -13
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/_base.py +6 -157
- teradataml/options/configure.py +4 -5
- teradataml/scriptmgmt/UserEnv.py +305 -38
- teradataml/scriptmgmt/lls_utils.py +376 -130
- teradataml/store/__init__.py +1 -1
- teradataml/table_operators/Apply.py +16 -1
- teradataml/table_operators/Script.py +20 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +2 -1
- teradataml/utils/internal_buffer.py +22 -2
- teradataml/utils/validators.py +313 -57
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +89 -14
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +107 -77
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
teradataml/automl/__init__.py
CHANGED
|
@@ -188,6 +188,9 @@ class AutoML:
|
|
|
188
188
|
results are persisted in a table; otherwise,
|
|
189
189
|
results are garbage collected at the end of the
|
|
190
190
|
session.
|
|
191
|
+
Note:
|
|
192
|
+
* User is responsible for cleanup of the persisted tables. List of persisted tables
|
|
193
|
+
in current session can be viewed using get_persisted_tables() method.
|
|
191
194
|
Default Value: False
|
|
192
195
|
Types: bool
|
|
193
196
|
|
|
@@ -476,8 +479,13 @@ class AutoML:
|
|
|
476
479
|
self._is_fit_called = False
|
|
477
480
|
self._is_load_model_called = False
|
|
478
481
|
self.kwargs = kwargs
|
|
479
|
-
self.table_name_mapping={}
|
|
480
|
-
|
|
482
|
+
self.table_name_mapping = {}
|
|
483
|
+
# Stores the table name of all intermediate datas
|
|
484
|
+
self._intermediate_table_names={}
|
|
485
|
+
self._auto_dataprep = False
|
|
486
|
+
self._phases = None
|
|
487
|
+
self._progressbar_prefix = "AutoML Running:"
|
|
488
|
+
|
|
481
489
|
@collect_queryband(queryband="AutoML_fit")
|
|
482
490
|
def fit(self,
|
|
483
491
|
data,
|
|
@@ -602,15 +610,25 @@ class AutoML:
|
|
|
602
610
|
clf = task_cls(self.data, self.target_column, self.custom_data)
|
|
603
611
|
|
|
604
612
|
self.model_info, self.leader_board, self.target_count, self.target_label, \
|
|
605
|
-
self.data_transformation_params, self.
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
613
|
+
self.data_transformation_params, self._intermediate_table_names = getattr(clf, cls_method)(
|
|
614
|
+
model_list = self.model_list,
|
|
615
|
+
auto = self.auto,
|
|
616
|
+
verbose = self.verbose,
|
|
617
|
+
max_runtime_secs = self.max_runtime_secs,
|
|
618
|
+
stopping_metric = self.stopping_metric,
|
|
619
|
+
stopping_tolerance = self.stopping_tolerance,
|
|
620
|
+
max_models = self.max_models,
|
|
621
|
+
auto_dataprep = self._auto_dataprep,
|
|
622
|
+
automl_phases = self._phases,
|
|
623
|
+
progress_prefix = self._progressbar_prefix,
|
|
624
|
+
**self.kwargs)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
# table_name_mapping stores the table name of all intermediate datas (lasso, rfe, pca)
|
|
628
|
+
# used for training models
|
|
629
|
+
keys_to_extract = ['lasso_train', 'rfe_train', 'pca_train']
|
|
630
|
+
self.table_name_mapping = {key: self._intermediate_table_names[key] for key in keys_to_extract
|
|
631
|
+
if key in self._intermediate_table_names}
|
|
614
632
|
|
|
615
633
|
# Model Evaluation Phase
|
|
616
634
|
self.m_evaluator = _ModelEvaluator(self.model_info,
|
|
@@ -680,13 +698,9 @@ class AutoML:
|
|
|
680
698
|
>>> prediction = automl_obj.predict(admissions_test, rank=3, use_loaded_models=True)
|
|
681
699
|
>>> prediction
|
|
682
700
|
"""
|
|
683
|
-
#
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
"'predict' method", \
|
|
687
|
-
"'fit' or 'load' method must be called before" \
|
|
688
|
-
" running predict.")
|
|
689
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
701
|
+
# Raise error if fit is not called before predict
|
|
702
|
+
_Validators._validate_dependent_method("predict", ["fit", "load"],
|
|
703
|
+
[self._is_fit_called, self._is_load_model_called])
|
|
690
704
|
|
|
691
705
|
# Appending predict arguments to list for validation.
|
|
692
706
|
arg_info_pred_matrix = []
|
|
@@ -862,13 +876,10 @@ class AutoML:
|
|
|
862
876
|
>>> evaluation = automl_obj.evaluate(admissions_test, rank=3, use_loaded_models=True)
|
|
863
877
|
>>> evaluation
|
|
864
878
|
"""
|
|
865
|
-
if
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
"'fit' or 'load' method must be called before" \
|
|
870
|
-
" running evaluate.")
|
|
871
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
879
|
+
# Raising exception if fit or load model is not called before evaluate
|
|
880
|
+
_Validators._validate_dependent_method("evaluate", ["fit", "load"],
|
|
881
|
+
[self._is_fit_called, self._is_load_model_called])
|
|
882
|
+
|
|
872
883
|
# Appending evaluate arguments to list for validation.
|
|
873
884
|
arg_info_pred_matrix = []
|
|
874
885
|
arg_info_pred_matrix.append(["data", data, False, (DataFrame), True])
|
|
@@ -1017,13 +1028,9 @@ class AutoML:
|
|
|
1017
1028
|
# Generate leaderboard using leaderboard() method on "automl_obj".
|
|
1018
1029
|
>>> automl_obj.leaderboard()
|
|
1019
1030
|
"""
|
|
1020
|
-
if not
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
"'leaderboard' method", \
|
|
1024
|
-
"'fit' method must be called before" \
|
|
1025
|
-
" generating leaderboard.")
|
|
1026
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1031
|
+
# Raise error if fit is not called before leaderboard
|
|
1032
|
+
_Validators._validate_dependent_method("leaderboard", "fit", self._is_fit_called)
|
|
1033
|
+
|
|
1027
1034
|
return self.leader_board
|
|
1028
1035
|
|
|
1029
1036
|
@collect_queryband(queryband="AutoML_leader")
|
|
@@ -1046,13 +1053,9 @@ class AutoML:
|
|
|
1046
1053
|
# Display best performing model using leader() method on "automl_obj".
|
|
1047
1054
|
>>> automl_obj.leader()
|
|
1048
1055
|
"""
|
|
1049
|
-
if not
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
"'leader' method", \
|
|
1053
|
-
"'fit' method must be called before" \
|
|
1054
|
-
" generating leader.")
|
|
1055
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1056
|
+
# Raise error if fit is not called before leader
|
|
1057
|
+
_Validators._validate_dependent_method("leader", "fit", self._is_fit_called)
|
|
1058
|
+
|
|
1056
1059
|
record = self.leader_board
|
|
1057
1060
|
if not _is_terminal():
|
|
1058
1061
|
display(record[record['RANK'] == 1])
|
|
@@ -1125,13 +1128,9 @@ class AutoML:
|
|
|
1125
1128
|
>>> automl_obj.model_hyperparameters(rank=1)
|
|
1126
1129
|
"""
|
|
1127
1130
|
|
|
1128
|
-
if
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
"'model_hyperparameters' method",
|
|
1132
|
-
"No models available to get hyperparameters. " \
|
|
1133
|
-
"Run 'fit()' or 'load()' methods to get models.")
|
|
1134
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1131
|
+
# Raise error if fit or load model is not called before model_hyperparameters
|
|
1132
|
+
_Validators._validate_dependent_method("model_hyperparameters", ["fit", "load"],
|
|
1133
|
+
[self._is_fit_called, self._is_load_model_called])
|
|
1135
1134
|
|
|
1136
1135
|
arg_info_matrix = []
|
|
1137
1136
|
arg_info_matrix.append(["rank", rank, True, (int), True])
|
|
@@ -1270,28 +1269,18 @@ class AutoML:
|
|
|
1270
1269
|
start_rank, end_rank = ranks.start, ranks.stop
|
|
1271
1270
|
|
|
1272
1271
|
# Check if both parts are non-negative integers
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
"'deploy' method", \
|
|
1276
|
-
"Provided start and end rank in 'ranks' "\
|
|
1277
|
-
"must be positive non-zero integers.")
|
|
1278
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1272
|
+
_Validators._validate_positive_int(start_rank, "ranks(start)")
|
|
1273
|
+
_Validators._validate_positive_int(end_rank, "ranks(end)")
|
|
1279
1274
|
|
|
1280
1275
|
# Check if start_rank is less than or equal to end_rank
|
|
1281
1276
|
if start_rank > end_rank:
|
|
1282
|
-
err =
|
|
1283
|
-
|
|
1284
|
-
"Provided start rank in 'ranks' must be less than"\
|
|
1285
|
-
" or equal to end rank in 'ranks'.")
|
|
1286
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1277
|
+
err = "Provided start rank in 'ranks' must be less than or equal to end rank in 'ranks'."
|
|
1278
|
+
self._raise_error("deploy", err)
|
|
1287
1279
|
|
|
1288
1280
|
# check end rank is less than or equal to total models
|
|
1289
1281
|
if end_rank > self.leader_board.RANK.max():
|
|
1290
|
-
err =
|
|
1291
|
-
|
|
1292
|
-
"Provided end rank in 'ranks' must be less than"\
|
|
1293
|
-
" or equal to total models available.")
|
|
1294
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1282
|
+
err = "Provided end rank in 'ranks' must be less than or equal to total models available."
|
|
1283
|
+
self._raise_error("deploy", err)
|
|
1295
1284
|
|
|
1296
1285
|
return start_rank, end_rank
|
|
1297
1286
|
|
|
@@ -1356,12 +1345,7 @@ class AutoML:
|
|
|
1356
1345
|
>>> obj.deploy("model_table", ranks=range(2,6))
|
|
1357
1346
|
"""
|
|
1358
1347
|
# raise Error if fit is not called
|
|
1359
|
-
|
|
1360
|
-
err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
1361
|
-
"'deploy' method", \
|
|
1362
|
-
"'fit' method must be called before" \
|
|
1363
|
-
" 'deploy'.")
|
|
1364
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1348
|
+
_Validators._validate_dependent_method("deploy", "fit", self._is_fit_called)
|
|
1365
1349
|
|
|
1366
1350
|
# Appending arguments to list for validation
|
|
1367
1351
|
arg_info_matrix = []
|
|
@@ -1808,6 +1792,185 @@ class AutoML:
|
|
|
1808
1792
|
|
|
1809
1793
|
db_drop_table(table_name)
|
|
1810
1794
|
|
|
1795
|
+
@collect_queryband(queryband="AutoML_get_persisted_tables")
|
|
1796
|
+
def get_persisted_tables(self):
|
|
1797
|
+
"""
|
|
1798
|
+
DESCRIPTION:
|
|
1799
|
+
Get the list of the tables that are persisted in the database.
|
|
1800
|
+
Note:
|
|
1801
|
+
* User is responsible for keeping track of the persistent tables
|
|
1802
|
+
and cleanup of the same if required.
|
|
1803
|
+
|
|
1804
|
+
PARAMETERS:
|
|
1805
|
+
None
|
|
1806
|
+
|
|
1807
|
+
RETURNS:
|
|
1808
|
+
Dictionary, containing the list of table names that mapped to the stage
|
|
1809
|
+
at which it was generated.
|
|
1810
|
+
|
|
1811
|
+
RAISES:
|
|
1812
|
+
TeradataMlException.
|
|
1813
|
+
|
|
1814
|
+
EXAMPLES:
|
|
1815
|
+
# Create an instance of the AutoML called "obj"
|
|
1816
|
+
# by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
|
|
1817
|
+
# 'persist' argument must be set to True in the AutoML object.
|
|
1818
|
+
>>> obj = AutoML(verbose=2, max_models=10, persist=True)
|
|
1819
|
+
|
|
1820
|
+
# Load and fit the data.
|
|
1821
|
+
>>> load_example_data("teradataml", "titanic")
|
|
1822
|
+
>>> titanic_data = DataFrame("titanic")
|
|
1823
|
+
>>> obj.fit(data = titanic_data, target_column = titanic.survived)
|
|
1824
|
+
|
|
1825
|
+
# Get the list of tables that are persisted in the database.
|
|
1826
|
+
>>> obj.get_persisted_tables()
|
|
1827
|
+
"""
|
|
1828
|
+
# Check if fit is called
|
|
1829
|
+
_Validators._validate_dependent_method("get_persisted_tables", "fit", self._is_fit_called)
|
|
1830
|
+
|
|
1831
|
+
# check if persist is passed as argument and is set to True
|
|
1832
|
+
persist_val = True if self.kwargs.get('persist', False) else None
|
|
1833
|
+
|
|
1834
|
+
_Validators._validate_dependent_argument("get_persisted_tables", True,
|
|
1835
|
+
"persist", persist_val,
|
|
1836
|
+
msg_arg_value='True')
|
|
1837
|
+
|
|
1838
|
+
# result table names
|
|
1839
|
+
return self._intermediate_table_names
|
|
1840
|
+
|
|
1841
|
+
def _raise_error(self, method_name, error_msg):
|
|
1842
|
+
"""
|
|
1843
|
+
DESCRIPTION:
|
|
1844
|
+
Internal Function raises an error message when a method
|
|
1845
|
+
fails to execute.
|
|
1846
|
+
|
|
1847
|
+
PARAMETERS:
|
|
1848
|
+
method_name:
|
|
1849
|
+
Required Argument.
|
|
1850
|
+
Specifies the method name that failed to execute.
|
|
1851
|
+
Types: str
|
|
1852
|
+
|
|
1853
|
+
error_msg:
|
|
1854
|
+
Required Argument.
|
|
1855
|
+
Specifies the error message to be displayed.
|
|
1856
|
+
Types: str
|
|
1857
|
+
|
|
1858
|
+
RAISES:
|
|
1859
|
+
TeradataMlException.
|
|
1860
|
+
|
|
1861
|
+
EXAMPLES:
|
|
1862
|
+
>>> self._raise_error("fit", "fit() method must be called before 'deploy'.")
|
|
1863
|
+
"""
|
|
1864
|
+
err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
1865
|
+
f'{method_name} method',
|
|
1866
|
+
error_msg)
|
|
1867
|
+
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1868
|
+
|
|
1869
|
+
@staticmethod
|
|
1870
|
+
def visualize(**kwargs):
|
|
1871
|
+
"""
|
|
1872
|
+
DESCRIPTION:
|
|
1873
|
+
Function visualizes the data using various plots such as heatmap,
|
|
1874
|
+
pair plot, histogram, univariate plot, count plot, box plot, and target distribution.
|
|
1875
|
+
|
|
1876
|
+
PARAMETERS:
|
|
1877
|
+
data:
|
|
1878
|
+
Required Argument.
|
|
1879
|
+
Specifies the input teradataml DataFrame for plotting.
|
|
1880
|
+
Types: teradataml Dataframe
|
|
1881
|
+
|
|
1882
|
+
target_column:
|
|
1883
|
+
Required Argument.
|
|
1884
|
+
Specifies the name of the target column in "data".
|
|
1885
|
+
Note:
|
|
1886
|
+
* "target_column" must be of numeric type.
|
|
1887
|
+
Types: str
|
|
1888
|
+
|
|
1889
|
+
plot_type:
|
|
1890
|
+
Optional Argument.
|
|
1891
|
+
Specifies the type of plot to be displayed.
|
|
1892
|
+
Default Value: "target"
|
|
1893
|
+
Permitted Values:
|
|
1894
|
+
* "heatmap": Displays a heatmap of feature correlations.
|
|
1895
|
+
* "pair": Displays a pair plot of features.
|
|
1896
|
+
* "density": Displays a density plot of features.
|
|
1897
|
+
* "count": Displays a count plot of categorical features.
|
|
1898
|
+
* "box": Displays a box plot of numerical features.
|
|
1899
|
+
* "target": Displays the distribution of the target variable.
|
|
1900
|
+
* "all": Displays all the plots.
|
|
1901
|
+
Types: str, list of str
|
|
1902
|
+
|
|
1903
|
+
length:
|
|
1904
|
+
Optional Argument.
|
|
1905
|
+
Specifies the length of the plot.
|
|
1906
|
+
Default Value: 10
|
|
1907
|
+
Types: int
|
|
1908
|
+
|
|
1909
|
+
breadth:
|
|
1910
|
+
Optional Argument.
|
|
1911
|
+
Specifies the breadth of the plot.
|
|
1912
|
+
Default Value: 8
|
|
1913
|
+
Types: int
|
|
1914
|
+
|
|
1915
|
+
columns:
|
|
1916
|
+
Optional Argument.
|
|
1917
|
+
Specifies the column names to be used for plotting.
|
|
1918
|
+
Types: str or list of string
|
|
1919
|
+
|
|
1920
|
+
max_features:
|
|
1921
|
+
Optional Argument.
|
|
1922
|
+
Specifies the maximum number of features to be used for plotting.
|
|
1923
|
+
Default Value: 10
|
|
1924
|
+
Note:
|
|
1925
|
+
* It applies separately to categorical and numerical features.
|
|
1926
|
+
Types: int
|
|
1927
|
+
|
|
1928
|
+
problem_type:
|
|
1929
|
+
Optional Argument.
|
|
1930
|
+
Specifies the type of problem.
|
|
1931
|
+
Permitted Values:
|
|
1932
|
+
* 'regression'
|
|
1933
|
+
* 'classification'
|
|
1934
|
+
Types: str
|
|
1935
|
+
|
|
1936
|
+
RETURNS:
|
|
1937
|
+
None
|
|
1938
|
+
|
|
1939
|
+
RAISES:
|
|
1940
|
+
TeradataMlException.
|
|
1941
|
+
|
|
1942
|
+
EXAMPLES:
|
|
1943
|
+
# Import either of AutoML or AutoClassifier or AutoRegressor or Autodataprep
|
|
1944
|
+
# from teradataml.
|
|
1945
|
+
>>> from teradataml import AutoML
|
|
1946
|
+
>>> from teradataml import DataFrame
|
|
1947
|
+
>>> load_example_data("teradataml", "titanic")
|
|
1948
|
+
>>> titanic_data = DataFrame("titanic")
|
|
1949
|
+
# Example 1: Visualize the data using AutoML class.
|
|
1950
|
+
>>> AutoML.visualize(data = titanic_data,
|
|
1951
|
+
... target_column = 'survived',
|
|
1952
|
+
... plot_type = ['heatmap', 'pair', 'histogram', 'target'],
|
|
1953
|
+
... length = 10,
|
|
1954
|
+
... breadth = 8,
|
|
1955
|
+
... max_features = 10,
|
|
1956
|
+
... problem_type = 'classification')
|
|
1957
|
+
|
|
1958
|
+
# Example 2: Visualize the data using AutoDataPrep class.
|
|
1959
|
+
>>> from teradataml import AutoDataPrep
|
|
1960
|
+
>>> obj = AutoDataPrep(task_type="classification")
|
|
1961
|
+
>>> obj.fit(data = titanic_data, target_column = 'survived')
|
|
1962
|
+
|
|
1963
|
+
# Retrieve the data from AutoDataPrep object.
|
|
1964
|
+
>>> datas = obj.get_data()
|
|
1965
|
+
|
|
1966
|
+
>>> AutoDataPrep.visualize(data = datas['lasso_train'],
|
|
1967
|
+
... target_column = 'survived',
|
|
1968
|
+
... plot_type = 'all'
|
|
1969
|
+
... length = 20,
|
|
1970
|
+
... breadth = 15)
|
|
1971
|
+
"""
|
|
1972
|
+
_FeatureExplore._visualize(**kwargs)
|
|
1973
|
+
|
|
1811
1974
|
@staticmethod
|
|
1812
1975
|
def generate_custom_config(file_name = "custom"):
|
|
1813
1976
|
"""
|
|
@@ -1892,7 +2055,7 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1892
2055
|
|
|
1893
2056
|
|
|
1894
2057
|
def _regression(self,
|
|
1895
|
-
model_list
|
|
2058
|
+
model_list=None,
|
|
1896
2059
|
auto = False,
|
|
1897
2060
|
verbose = 0,
|
|
1898
2061
|
max_runtime_secs = None,
|
|
@@ -1969,13 +2132,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1969
2132
|
RETURNS:
|
|
1970
2133
|
a tuple containing, model information and leaderboard.
|
|
1971
2134
|
"""
|
|
2135
|
+
|
|
1972
2136
|
# Feature Exploration Phase
|
|
1973
2137
|
_FeatureExplore.__init__(self,
|
|
1974
2138
|
data = self.data,
|
|
1975
2139
|
target_column = self.target_column,
|
|
1976
2140
|
verbose=verbose)
|
|
1977
2141
|
if verbose > 0:
|
|
1978
|
-
self._exploration()
|
|
2142
|
+
self._exploration(**kwargs)
|
|
1979
2143
|
# Feature Engineering Phase
|
|
1980
2144
|
_FeatureEngineering.__init__(self,
|
|
1981
2145
|
data = self.data,
|
|
@@ -1986,7 +2150,8 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1986
2150
|
**kwargs)
|
|
1987
2151
|
# Start time
|
|
1988
2152
|
start_time = time.time()
|
|
1989
|
-
data, excluded_columns, target_label
|
|
2153
|
+
data, excluded_columns, target_label,\
|
|
2154
|
+
data_transformation_params, data_mapping = self.feature_engineering(auto)
|
|
1990
2155
|
|
|
1991
2156
|
# Data preparation Phase
|
|
1992
2157
|
_DataPreparation.__init__(self,
|
|
@@ -1996,8 +2161,18 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1996
2161
|
excluded_columns = excluded_columns,
|
|
1997
2162
|
custom_data = self.custom_data,
|
|
1998
2163
|
data_transform_dict = data_transformation_params,
|
|
2164
|
+
data_mapping = data_mapping,
|
|
1999
2165
|
**kwargs)
|
|
2000
|
-
features, data_transformation_params
|
|
2166
|
+
features, data_transformation_params,\
|
|
2167
|
+
data_mapping = self.data_preparation(auto)
|
|
2168
|
+
|
|
2169
|
+
if kwargs.get('auto_dataprep', False):
|
|
2170
|
+
models_info = None
|
|
2171
|
+
leaderboard = None
|
|
2172
|
+
target_count = None
|
|
2173
|
+
return (models_info, leaderboard,
|
|
2174
|
+
target_count, target_label,
|
|
2175
|
+
data_transformation_params, data_mapping)
|
|
2001
2176
|
|
|
2002
2177
|
# Calculating max_runtime_secs for model training by,
|
|
2003
2178
|
# subtracting the time taken for feature engineering and data preparation
|
|
@@ -2019,12 +2194,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
2019
2194
|
custom_data = self.custom_data,
|
|
2020
2195
|
**kwargs)
|
|
2021
2196
|
models_info, leaderboard, target_count = self.model_training(auto = auto,
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2197
|
+
max_runtime_secs = max_runtime_secs,
|
|
2198
|
+
stopping_metric = stopping_metric,
|
|
2199
|
+
stopping_tolerance = stopping_tolerance,
|
|
2200
|
+
max_models = max_models)
|
|
2026
2201
|
|
|
2027
|
-
return (models_info, leaderboard,
|
|
2202
|
+
return (models_info, leaderboard,
|
|
2203
|
+
target_count, target_label,
|
|
2204
|
+
data_transformation_params, data_mapping)
|
|
2028
2205
|
|
|
2029
2206
|
class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
|
|
2030
2207
|
|
|
@@ -2057,7 +2234,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2057
2234
|
self.custom_data = custom_data
|
|
2058
2235
|
|
|
2059
2236
|
def _classification(self,
|
|
2060
|
-
model_list
|
|
2237
|
+
model_list=None,
|
|
2061
2238
|
auto = False,
|
|
2062
2239
|
verbose = 0,
|
|
2063
2240
|
max_runtime_secs = None,
|
|
@@ -2134,14 +2311,16 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2134
2311
|
RETURNS:
|
|
2135
2312
|
a tuple containing, model information and leaderboard.
|
|
2136
2313
|
"""
|
|
2314
|
+
|
|
2137
2315
|
|
|
2138
2316
|
# Feature Exploration Phase
|
|
2139
2317
|
_FeatureExplore.__init__(self,
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2318
|
+
data = self.data,
|
|
2319
|
+
target_column = self.target_column,
|
|
2320
|
+
verbose=verbose,
|
|
2321
|
+
task_type = "classification")
|
|
2143
2322
|
if verbose > 0:
|
|
2144
|
-
self._exploration()
|
|
2323
|
+
self._exploration(**kwargs)
|
|
2145
2324
|
# Feature Engineeting Phase
|
|
2146
2325
|
_FeatureEngineering.__init__(self,
|
|
2147
2326
|
data = self.data,
|
|
@@ -2153,7 +2332,9 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2153
2332
|
**kwargs)
|
|
2154
2333
|
# Start time
|
|
2155
2334
|
start_time = time.time()
|
|
2156
|
-
data, excluded_columns, target_label
|
|
2335
|
+
data, excluded_columns, target_label,\
|
|
2336
|
+
data_transformation_params, data_mapping = self.feature_engineering(auto)
|
|
2337
|
+
|
|
2157
2338
|
# Data Preparation Phase
|
|
2158
2339
|
_DataPreparation.__init__(self,
|
|
2159
2340
|
data = self.data,
|
|
@@ -2163,8 +2344,19 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2163
2344
|
custom_data = self.custom_data,
|
|
2164
2345
|
data_transform_dict = data_transformation_params,
|
|
2165
2346
|
task_type = "Classification",
|
|
2347
|
+
data_mapping = data_mapping,
|
|
2166
2348
|
**kwargs)
|
|
2167
|
-
|
|
2349
|
+
|
|
2350
|
+
features, data_transformation_params, \
|
|
2351
|
+
data_mapping = self.data_preparation(auto)
|
|
2352
|
+
|
|
2353
|
+
if kwargs.get('auto_dataprep', False):
|
|
2354
|
+
models_info = None
|
|
2355
|
+
leaderboard = None
|
|
2356
|
+
target_count = None
|
|
2357
|
+
return (models_info, leaderboard,
|
|
2358
|
+
target_count, target_label,
|
|
2359
|
+
data_transformation_params, data_mapping)
|
|
2168
2360
|
|
|
2169
2361
|
# Calculating max_runtime_secs for model training by,
|
|
2170
2362
|
# subtracting the time taken for feature engineering and data preparation
|
|
@@ -2186,28 +2378,14 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2186
2378
|
custom_data = self.custom_data,
|
|
2187
2379
|
**kwargs)
|
|
2188
2380
|
models_info, leaderboard, target_count = self.model_training(auto = auto,
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2381
|
+
max_runtime_secs = max_runtime_secs,
|
|
2382
|
+
stopping_metric = stopping_metric,
|
|
2383
|
+
stopping_tolerance = stopping_tolerance,
|
|
2384
|
+
max_models = max_models)
|
|
2193
2385
|
|
|
2194
|
-
return (models_info, leaderboard,
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
"""
|
|
2198
|
-
DESCRIPTION:
|
|
2199
|
-
Internal function displays the target column distribution of Target column/ Response column.
|
|
2200
|
-
"""
|
|
2201
|
-
# If data visualization libraries are available
|
|
2202
|
-
if self._check_visualization_libraries() and not _is_terminal():
|
|
2203
|
-
import matplotlib.pyplot as plt
|
|
2204
|
-
import seaborn as sns
|
|
2205
|
-
self._display_msg(msg='\nTarget Column Distribution:',
|
|
2206
|
-
show_data=True)
|
|
2207
|
-
plt.figure(figsize=(6, 6))
|
|
2208
|
-
# Ploting a histogram for target column
|
|
2209
|
-
sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
|
|
2210
|
-
plt.show()
|
|
2386
|
+
return (models_info, leaderboard,
|
|
2387
|
+
target_count, target_label,
|
|
2388
|
+
data_transformation_params, data_mapping)
|
|
2211
2389
|
|
|
2212
2390
|
def _check_data_imbalance(self,
|
|
2213
2391
|
data=None):
|
|
@@ -2435,6 +2613,9 @@ class AutoRegressor(AutoML):
|
|
|
2435
2613
|
results are persisted in a table; otherwise,
|
|
2436
2614
|
results are garbage collected at the end of the
|
|
2437
2615
|
session.
|
|
2616
|
+
Note:
|
|
2617
|
+
* User is responsible for cleanup of the persisted tables. List of persisted tables
|
|
2618
|
+
in current session can be viewed using get_persisted_tables() method.
|
|
2438
2619
|
Default Value: False
|
|
2439
2620
|
Types: bool
|
|
2440
2621
|
|
|
@@ -2675,6 +2856,9 @@ class AutoClassifier(AutoML):
|
|
|
2675
2856
|
results are persisted in a table; otherwise,
|
|
2676
2857
|
results are garbage collected at the end of the
|
|
2677
2858
|
session.
|
|
2859
|
+
Note:
|
|
2860
|
+
* User is responsible for cleanup of the persisted tables. List of persisted tables
|
|
2861
|
+
in current session can be viewed using get_persisted_tables() method.
|
|
2678
2862
|
Default Value: False
|
|
2679
2863
|
Types: bool
|
|
2680
2864
|
|
|
@@ -2904,4 +3088,4 @@ class AutoClassifier(AutoML):
|
|
|
2904
3088
|
stopping_tolerance=self.stopping_tolerance,
|
|
2905
3089
|
max_models=self.max_models,
|
|
2906
3090
|
custom_config_file=self.custom_config_file,
|
|
2907
|
-
**kwargs)
|
|
3091
|
+
**kwargs)
|