teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
teradataml/automl/__init__.py
CHANGED
|
@@ -30,7 +30,7 @@ from teradataml import ColumnExpression
|
|
|
30
30
|
from teradataml.dataframe.dataframe import DataFrame
|
|
31
31
|
from teradataml.utils.utils import execute_sql
|
|
32
32
|
from teradataml.utils.validators import _Validators
|
|
33
|
-
from teradataml import ROC, BLOB
|
|
33
|
+
from teradataml import ROC, BLOB, VARCHAR
|
|
34
34
|
from teradataml.utils.dtypes import _Dtypes
|
|
35
35
|
from teradataml.common.utils import UtilFuncs
|
|
36
36
|
from teradataml import TeradataMlException
|
|
@@ -94,6 +94,9 @@ class AutoML:
|
|
|
94
94
|
the processes by passing the JSON file path in case of custom run. It also
|
|
95
95
|
supports early stopping of model training based on stopping metrics,
|
|
96
96
|
maximum running time and maximum models to be trained.
|
|
97
|
+
Note:
|
|
98
|
+
* configure.temp_object_type="VT" follows sequential execution.
|
|
99
|
+
|
|
97
100
|
|
|
98
101
|
PARAMETERS:
|
|
99
102
|
task_type:
|
|
@@ -185,8 +188,17 @@ class AutoML:
|
|
|
185
188
|
results are persisted in a table; otherwise,
|
|
186
189
|
results are garbage collected at the end of the
|
|
187
190
|
session.
|
|
191
|
+
Note:
|
|
192
|
+
* User is responsible for cleanup of the persisted tables. List of persisted tables
|
|
193
|
+
in current session can be viewed using get_persisted_tables() method.
|
|
188
194
|
Default Value: False
|
|
189
195
|
Types: bool
|
|
196
|
+
|
|
197
|
+
seed:
|
|
198
|
+
Optional Argument.
|
|
199
|
+
Specifies the random seed for reproducibility.
|
|
200
|
+
Default Value: 42
|
|
201
|
+
Types: int
|
|
190
202
|
|
|
191
203
|
RETURNS:
|
|
192
204
|
Instance of AutoML.
|
|
@@ -417,9 +429,11 @@ class AutoML:
|
|
|
417
429
|
|
|
418
430
|
volatile = kwargs.get('volatile', False)
|
|
419
431
|
persist = kwargs.get('persist', False)
|
|
432
|
+
seed = kwargs.get('seed', 42)
|
|
420
433
|
|
|
421
434
|
arg_info_matrix.append(["volatile", volatile, True, (bool)])
|
|
422
435
|
arg_info_matrix.append(["persist", persist, True, (bool)])
|
|
436
|
+
arg_info_matrix.append(["seed", seed, True, (int)])
|
|
423
437
|
|
|
424
438
|
# Validate argument types
|
|
425
439
|
_Validators._validate_function_arguments(arg_info_matrix)
|
|
@@ -465,8 +479,13 @@ class AutoML:
|
|
|
465
479
|
self._is_fit_called = False
|
|
466
480
|
self._is_load_model_called = False
|
|
467
481
|
self.kwargs = kwargs
|
|
468
|
-
self.table_name_mapping={}
|
|
469
|
-
|
|
482
|
+
self.table_name_mapping = {}
|
|
483
|
+
# Stores the table name of all intermediate datas
|
|
484
|
+
self._intermediate_table_names={}
|
|
485
|
+
self._auto_dataprep = False
|
|
486
|
+
self._phases = None
|
|
487
|
+
self._progressbar_prefix = "AutoML Running:"
|
|
488
|
+
|
|
470
489
|
@collect_queryband(queryband="AutoML_fit")
|
|
471
490
|
def fit(self,
|
|
472
491
|
data,
|
|
@@ -517,7 +536,7 @@ class AutoML:
|
|
|
517
536
|
|
|
518
537
|
# Validate argument types
|
|
519
538
|
_Validators._validate_function_arguments(arg_info_fit_matrix)
|
|
520
|
-
|
|
539
|
+
|
|
521
540
|
# Initializing class variables
|
|
522
541
|
self.data = data
|
|
523
542
|
self.target_column = target_column
|
|
@@ -591,15 +610,25 @@ class AutoML:
|
|
|
591
610
|
clf = task_cls(self.data, self.target_column, self.custom_data)
|
|
592
611
|
|
|
593
612
|
self.model_info, self.leader_board, self.target_count, self.target_label, \
|
|
594
|
-
self.data_transformation_params, self.
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
613
|
+
self.data_transformation_params, self._intermediate_table_names = getattr(clf, cls_method)(
|
|
614
|
+
model_list = self.model_list,
|
|
615
|
+
auto = self.auto,
|
|
616
|
+
verbose = self.verbose,
|
|
617
|
+
max_runtime_secs = self.max_runtime_secs,
|
|
618
|
+
stopping_metric = self.stopping_metric,
|
|
619
|
+
stopping_tolerance = self.stopping_tolerance,
|
|
620
|
+
max_models = self.max_models,
|
|
621
|
+
auto_dataprep = self._auto_dataprep,
|
|
622
|
+
automl_phases = self._phases,
|
|
623
|
+
progress_prefix = self._progressbar_prefix,
|
|
624
|
+
**self.kwargs)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
# table_name_mapping stores the table name of all intermediate datas (lasso, rfe, pca)
|
|
628
|
+
# used for training models
|
|
629
|
+
keys_to_extract = ['lasso_train', 'rfe_train', 'pca_train']
|
|
630
|
+
self.table_name_mapping = {key: self._intermediate_table_names[key] for key in keys_to_extract
|
|
631
|
+
if key in self._intermediate_table_names}
|
|
603
632
|
|
|
604
633
|
# Model Evaluation Phase
|
|
605
634
|
self.m_evaluator = _ModelEvaluator(self.model_info,
|
|
@@ -669,13 +698,9 @@ class AutoML:
|
|
|
669
698
|
>>> prediction = automl_obj.predict(admissions_test, rank=3, use_loaded_models=True)
|
|
670
699
|
>>> prediction
|
|
671
700
|
"""
|
|
672
|
-
#
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
"'predict' method", \
|
|
676
|
-
"'fit' or 'load' method must be called before" \
|
|
677
|
-
" running predict.")
|
|
678
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
701
|
+
# Raise error if fit is not called before predict
|
|
702
|
+
_Validators._validate_dependent_method("predict", ["fit", "load"],
|
|
703
|
+
[self._is_fit_called, self._is_load_model_called])
|
|
679
704
|
|
|
680
705
|
# Appending predict arguments to list for validation.
|
|
681
706
|
arg_info_pred_matrix = []
|
|
@@ -758,11 +783,12 @@ class AutoML:
|
|
|
758
783
|
if self.target_column_ind:
|
|
759
784
|
prediction_column = 'prediction' if 'prediction' in pred.result.columns else 'Prediction'
|
|
760
785
|
probability_column = 'prob_1'
|
|
786
|
+
pred_target_count = pred.result.drop_duplicate(self.target_column).size
|
|
761
787
|
# Displaying confusion matrix and ROC-AUC for classification problem
|
|
762
788
|
if self.is_classification_type():
|
|
763
789
|
print_data = lambda data: print(data) if _is_terminal() else display(data)
|
|
764
790
|
# Displaying ROC-AUC for binary classification
|
|
765
|
-
if self.target_count == 2:
|
|
791
|
+
if self.target_count == 2 and pred_target_count == 2:
|
|
766
792
|
fit_params = {
|
|
767
793
|
"probability_column" : probability_column,
|
|
768
794
|
"observation_column" : self.target_column,
|
|
@@ -850,13 +876,10 @@ class AutoML:
|
|
|
850
876
|
>>> evaluation = automl_obj.evaluate(admissions_test, rank=3, use_loaded_models=True)
|
|
851
877
|
>>> evaluation
|
|
852
878
|
"""
|
|
853
|
-
if
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
"'fit' or 'load' method must be called before" \
|
|
858
|
-
" running evaluate.")
|
|
859
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
879
|
+
# Raising exception if fit or load model is not called before evaluate
|
|
880
|
+
_Validators._validate_dependent_method("evaluate", ["fit", "load"],
|
|
881
|
+
[self._is_fit_called, self._is_load_model_called])
|
|
882
|
+
|
|
860
883
|
# Appending evaluate arguments to list for validation.
|
|
861
884
|
arg_info_pred_matrix = []
|
|
862
885
|
arg_info_pred_matrix.append(["data", data, False, (DataFrame), True])
|
|
@@ -886,8 +909,8 @@ class AutoML:
|
|
|
886
909
|
# as it is required for evaluation.
|
|
887
910
|
if self.target_column not in data.columns:
|
|
888
911
|
raise TeradataMlException(
|
|
889
|
-
|
|
890
|
-
|
|
912
|
+
Messages.get_message(MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE).format(self.target_column),
|
|
913
|
+
MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE)
|
|
891
914
|
|
|
892
915
|
# Checking if data is already transformed before or not
|
|
893
916
|
data_node_id = data._nodeid
|
|
@@ -1005,13 +1028,9 @@ class AutoML:
|
|
|
1005
1028
|
# Generate leaderboard using leaderboard() method on "automl_obj".
|
|
1006
1029
|
>>> automl_obj.leaderboard()
|
|
1007
1030
|
"""
|
|
1008
|
-
if not
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
"'leaderboard' method", \
|
|
1012
|
-
"'fit' method must be called before" \
|
|
1013
|
-
" generating leaderboard.")
|
|
1014
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1031
|
+
# Raise error if fit is not called before leaderboard
|
|
1032
|
+
_Validators._validate_dependent_method("leaderboard", "fit", self._is_fit_called)
|
|
1033
|
+
|
|
1015
1034
|
return self.leader_board
|
|
1016
1035
|
|
|
1017
1036
|
@collect_queryband(queryband="AutoML_leader")
|
|
@@ -1034,13 +1053,9 @@ class AutoML:
|
|
|
1034
1053
|
# Display best performing model using leader() method on "automl_obj".
|
|
1035
1054
|
>>> automl_obj.leader()
|
|
1036
1055
|
"""
|
|
1037
|
-
if not
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
"'leader' method", \
|
|
1041
|
-
"'fit' method must be called before" \
|
|
1042
|
-
" generating leader.")
|
|
1043
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1056
|
+
# Raise error if fit is not called before leader
|
|
1057
|
+
_Validators._validate_dependent_method("leader", "fit", self._is_fit_called)
|
|
1058
|
+
|
|
1044
1059
|
record = self.leader_board
|
|
1045
1060
|
if not _is_terminal():
|
|
1046
1061
|
display(record[record['RANK'] == 1])
|
|
@@ -1113,13 +1128,9 @@ class AutoML:
|
|
|
1113
1128
|
>>> automl_obj.model_hyperparameters(rank=1)
|
|
1114
1129
|
"""
|
|
1115
1130
|
|
|
1116
|
-
if
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
"'model_hyperparameters' method",
|
|
1120
|
-
"No models available to get hyperparameters. " \
|
|
1121
|
-
"Run 'fit()' or 'load()' methods to get models.")
|
|
1122
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1131
|
+
# Raise error if fit or load model is not called before model_hyperparameters
|
|
1132
|
+
_Validators._validate_dependent_method("model_hyperparameters", ["fit", "load"],
|
|
1133
|
+
[self._is_fit_called, self._is_load_model_called])
|
|
1123
1134
|
|
|
1124
1135
|
arg_info_matrix = []
|
|
1125
1136
|
arg_info_matrix.append(["rank", rank, True, (int), True])
|
|
@@ -1234,6 +1245,8 @@ class AutoML:
|
|
|
1234
1245
|
pca.n_components_ = load_pca_info['n_components']
|
|
1235
1246
|
pca.noise_variance_ = load_pca_info['noise_variance']
|
|
1236
1247
|
pca.singular_values_ = np.array(load_pca_info['singular_values'])
|
|
1248
|
+
pca.feature_names_in_ = data_params['pca_fit_columns']
|
|
1249
|
+
pca.n_features_in_ = len(data_params['pca_fit_columns'])
|
|
1237
1250
|
|
|
1238
1251
|
data_params['pca_fit_instance'] = pca
|
|
1239
1252
|
|
|
@@ -1256,28 +1269,18 @@ class AutoML:
|
|
|
1256
1269
|
start_rank, end_rank = ranks.start, ranks.stop
|
|
1257
1270
|
|
|
1258
1271
|
# Check if both parts are non-negative integers
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
"'deploy' method", \
|
|
1262
|
-
"Provided start and end rank in 'ranks' "\
|
|
1263
|
-
"must be positive non-zero integers.")
|
|
1264
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1272
|
+
_Validators._validate_positive_int(start_rank, "ranks(start)")
|
|
1273
|
+
_Validators._validate_positive_int(end_rank, "ranks(end)")
|
|
1265
1274
|
|
|
1266
1275
|
# Check if start_rank is less than or equal to end_rank
|
|
1267
1276
|
if start_rank > end_rank:
|
|
1268
|
-
err =
|
|
1269
|
-
|
|
1270
|
-
"Provided start rank in 'ranks' must be less than"\
|
|
1271
|
-
" or equal to end rank in 'ranks'.")
|
|
1272
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1277
|
+
err = "Provided start rank in 'ranks' must be less than or equal to end rank in 'ranks'."
|
|
1278
|
+
self._raise_error("deploy", err)
|
|
1273
1279
|
|
|
1274
1280
|
# check end rank is less than or equal to total models
|
|
1275
1281
|
if end_rank > self.leader_board.RANK.max():
|
|
1276
|
-
err =
|
|
1277
|
-
|
|
1278
|
-
"Provided end rank in 'ranks' must be less than"\
|
|
1279
|
-
" or equal to total models available.")
|
|
1280
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1282
|
+
err = "Provided end rank in 'ranks' must be less than or equal to total models available."
|
|
1283
|
+
self._raise_error("deploy", err)
|
|
1281
1284
|
|
|
1282
1285
|
return start_rank, end_rank
|
|
1283
1286
|
|
|
@@ -1342,12 +1345,7 @@ class AutoML:
|
|
|
1342
1345
|
>>> obj.deploy("model_table", ranks=range(2,6))
|
|
1343
1346
|
"""
|
|
1344
1347
|
# raise Error if fit is not called
|
|
1345
|
-
|
|
1346
|
-
err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
1347
|
-
"'deploy' method", \
|
|
1348
|
-
"'fit' method must be called before" \
|
|
1349
|
-
" 'deploy'.")
|
|
1350
|
-
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1348
|
+
_Validators._validate_dependent_method("deploy", "fit", self._is_fit_called)
|
|
1351
1349
|
|
|
1352
1350
|
# Appending arguments to list for validation
|
|
1353
1351
|
arg_info_matrix = []
|
|
@@ -1442,7 +1440,8 @@ class AutoML:
|
|
|
1442
1440
|
# Saving data transformation parameters to the specified table
|
|
1443
1441
|
sv_models = pd.concat([sv_models, df], ignore_index=True, sort=False)
|
|
1444
1442
|
|
|
1445
|
-
copy_to_sql(df = sv_models, table_name=table_name, if_exists='replace', types={'DATA_PARAMS':BLOB
|
|
1443
|
+
copy_to_sql(df = sv_models, table_name=table_name, if_exists='replace', types={'DATA_PARAMS':BLOB,
|
|
1444
|
+
'PARAMETERS':VARCHAR(length=32000, charset='UNICODE')})
|
|
1446
1445
|
|
|
1447
1446
|
print('Model Deployment Completed Successfully.')
|
|
1448
1447
|
|
|
@@ -1793,6 +1792,185 @@ class AutoML:
|
|
|
1793
1792
|
|
|
1794
1793
|
db_drop_table(table_name)
|
|
1795
1794
|
|
|
1795
|
+
@collect_queryband(queryband="AutoML_get_persisted_tables")
|
|
1796
|
+
def get_persisted_tables(self):
|
|
1797
|
+
"""
|
|
1798
|
+
DESCRIPTION:
|
|
1799
|
+
Get the list of the tables that are persisted in the database.
|
|
1800
|
+
Note:
|
|
1801
|
+
* User is responsible for keeping track of the persistent tables
|
|
1802
|
+
and cleanup of the same if required.
|
|
1803
|
+
|
|
1804
|
+
PARAMETERS:
|
|
1805
|
+
None
|
|
1806
|
+
|
|
1807
|
+
RETURNS:
|
|
1808
|
+
Dictionary, containing the list of table names that mapped to the stage
|
|
1809
|
+
at which it was generated.
|
|
1810
|
+
|
|
1811
|
+
RAISES:
|
|
1812
|
+
TeradataMlException.
|
|
1813
|
+
|
|
1814
|
+
EXAMPLES:
|
|
1815
|
+
# Create an instance of the AutoML called "obj"
|
|
1816
|
+
# by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
|
|
1817
|
+
# 'persist' argument must be set to True in the AutoML object.
|
|
1818
|
+
>>> obj = AutoML(verbose=2, max_models=10, persist=True)
|
|
1819
|
+
|
|
1820
|
+
# Load and fit the data.
|
|
1821
|
+
>>> load_example_data("teradataml", "titanic")
|
|
1822
|
+
>>> titanic_data = DataFrame("titanic")
|
|
1823
|
+
>>> obj.fit(data = titanic_data, target_column = titanic.survived)
|
|
1824
|
+
|
|
1825
|
+
# Get the list of tables that are persisted in the database.
|
|
1826
|
+
>>> obj.get_persisted_tables()
|
|
1827
|
+
"""
|
|
1828
|
+
# Check if fit is called
|
|
1829
|
+
_Validators._validate_dependent_method("get_persisted_tables", "fit", self._is_fit_called)
|
|
1830
|
+
|
|
1831
|
+
# check if persist is passed as argument and is set to True
|
|
1832
|
+
persist_val = True if self.kwargs.get('persist', False) else None
|
|
1833
|
+
|
|
1834
|
+
_Validators._validate_dependent_argument("get_persisted_tables", True,
|
|
1835
|
+
"persist", persist_val,
|
|
1836
|
+
msg_arg_value='True')
|
|
1837
|
+
|
|
1838
|
+
# result table names
|
|
1839
|
+
return self._intermediate_table_names
|
|
1840
|
+
|
|
1841
|
+
def _raise_error(self, method_name, error_msg):
|
|
1842
|
+
"""
|
|
1843
|
+
DESCRIPTION:
|
|
1844
|
+
Internal Function raises an error message when a method
|
|
1845
|
+
fails to execute.
|
|
1846
|
+
|
|
1847
|
+
PARAMETERS:
|
|
1848
|
+
method_name:
|
|
1849
|
+
Required Argument.
|
|
1850
|
+
Specifies the method name that failed to execute.
|
|
1851
|
+
Types: str
|
|
1852
|
+
|
|
1853
|
+
error_msg:
|
|
1854
|
+
Required Argument.
|
|
1855
|
+
Specifies the error message to be displayed.
|
|
1856
|
+
Types: str
|
|
1857
|
+
|
|
1858
|
+
RAISES:
|
|
1859
|
+
TeradataMlException.
|
|
1860
|
+
|
|
1861
|
+
EXAMPLES:
|
|
1862
|
+
>>> self._raise_error("fit", "fit() method must be called before 'deploy'.")
|
|
1863
|
+
"""
|
|
1864
|
+
err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
1865
|
+
f'{method_name} method',
|
|
1866
|
+
error_msg)
|
|
1867
|
+
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
1868
|
+
|
|
1869
|
+
@staticmethod
|
|
1870
|
+
def visualize(**kwargs):
|
|
1871
|
+
"""
|
|
1872
|
+
DESCRIPTION:
|
|
1873
|
+
Function visualizes the data using various plots such as heatmap,
|
|
1874
|
+
pair plot, histogram, univariate plot, count plot, box plot, and target distribution.
|
|
1875
|
+
|
|
1876
|
+
PARAMETERS:
|
|
1877
|
+
data:
|
|
1878
|
+
Required Argument.
|
|
1879
|
+
Specifies the input teradataml DataFrame for plotting.
|
|
1880
|
+
Types: teradataml Dataframe
|
|
1881
|
+
|
|
1882
|
+
target_column:
|
|
1883
|
+
Required Argument.
|
|
1884
|
+
Specifies the name of the target column in "data".
|
|
1885
|
+
Note:
|
|
1886
|
+
* "target_column" must be of numeric type.
|
|
1887
|
+
Types: str
|
|
1888
|
+
|
|
1889
|
+
plot_type:
|
|
1890
|
+
Optional Argument.
|
|
1891
|
+
Specifies the type of plot to be displayed.
|
|
1892
|
+
Default Value: "target"
|
|
1893
|
+
Permitted Values:
|
|
1894
|
+
* "heatmap": Displays a heatmap of feature correlations.
|
|
1895
|
+
* "pair": Displays a pair plot of features.
|
|
1896
|
+
* "density": Displays a density plot of features.
|
|
1897
|
+
* "count": Displays a count plot of categorical features.
|
|
1898
|
+
* "box": Displays a box plot of numerical features.
|
|
1899
|
+
* "target": Displays the distribution of the target variable.
|
|
1900
|
+
* "all": Displays all the plots.
|
|
1901
|
+
Types: str, list of str
|
|
1902
|
+
|
|
1903
|
+
length:
|
|
1904
|
+
Optional Argument.
|
|
1905
|
+
Specifies the length of the plot.
|
|
1906
|
+
Default Value: 10
|
|
1907
|
+
Types: int
|
|
1908
|
+
|
|
1909
|
+
breadth:
|
|
1910
|
+
Optional Argument.
|
|
1911
|
+
Specifies the breadth of the plot.
|
|
1912
|
+
Default Value: 8
|
|
1913
|
+
Types: int
|
|
1914
|
+
|
|
1915
|
+
columns:
|
|
1916
|
+
Optional Argument.
|
|
1917
|
+
Specifies the column names to be used for plotting.
|
|
1918
|
+
Types: str or list of string
|
|
1919
|
+
|
|
1920
|
+
max_features:
|
|
1921
|
+
Optional Argument.
|
|
1922
|
+
Specifies the maximum number of features to be used for plotting.
|
|
1923
|
+
Default Value: 10
|
|
1924
|
+
Note:
|
|
1925
|
+
* It applies separately to categorical and numerical features.
|
|
1926
|
+
Types: int
|
|
1927
|
+
|
|
1928
|
+
problem_type:
|
|
1929
|
+
Optional Argument.
|
|
1930
|
+
Specifies the type of problem.
|
|
1931
|
+
Permitted Values:
|
|
1932
|
+
* 'regression'
|
|
1933
|
+
* 'classification'
|
|
1934
|
+
Types: str
|
|
1935
|
+
|
|
1936
|
+
RETURNS:
|
|
1937
|
+
None
|
|
1938
|
+
|
|
1939
|
+
RAISES:
|
|
1940
|
+
TeradataMlException.
|
|
1941
|
+
|
|
1942
|
+
EXAMPLES:
|
|
1943
|
+
# Import either of AutoML or AutoClassifier or AutoRegressor or Autodataprep
|
|
1944
|
+
# from teradataml.
|
|
1945
|
+
>>> from teradataml import AutoML
|
|
1946
|
+
>>> from teradataml import DataFrame
|
|
1947
|
+
>>> load_example_data("teradataml", "titanic")
|
|
1948
|
+
>>> titanic_data = DataFrame("titanic")
|
|
1949
|
+
# Example 1: Visualize the data using AutoML class.
|
|
1950
|
+
>>> AutoML.visualize(data = titanic_data,
|
|
1951
|
+
... target_column = 'survived',
|
|
1952
|
+
... plot_type = ['heatmap', 'pair', 'histogram', 'target'],
|
|
1953
|
+
... length = 10,
|
|
1954
|
+
... breadth = 8,
|
|
1955
|
+
... max_features = 10,
|
|
1956
|
+
... problem_type = 'classification')
|
|
1957
|
+
|
|
1958
|
+
# Example 2: Visualize the data using AutoDataPrep class.
|
|
1959
|
+
>>> from teradataml import AutoDataPrep
|
|
1960
|
+
>>> obj = AutoDataPrep(task_type="classification")
|
|
1961
|
+
>>> obj.fit(data = titanic_data, target_column = 'survived')
|
|
1962
|
+
|
|
1963
|
+
# Retrieve the data from AutoDataPrep object.
|
|
1964
|
+
>>> datas = obj.get_data()
|
|
1965
|
+
|
|
1966
|
+
>>> AutoDataPrep.visualize(data = datas['lasso_train'],
|
|
1967
|
+
... target_column = 'survived',
|
|
1968
|
+
... plot_type = 'all'
|
|
1969
|
+
... length = 20,
|
|
1970
|
+
... breadth = 15)
|
|
1971
|
+
"""
|
|
1972
|
+
_FeatureExplore._visualize(**kwargs)
|
|
1973
|
+
|
|
1796
1974
|
@staticmethod
|
|
1797
1975
|
def generate_custom_config(file_name = "custom"):
|
|
1798
1976
|
"""
|
|
@@ -1877,7 +2055,7 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1877
2055
|
|
|
1878
2056
|
|
|
1879
2057
|
def _regression(self,
|
|
1880
|
-
model_list
|
|
2058
|
+
model_list=None,
|
|
1881
2059
|
auto = False,
|
|
1882
2060
|
verbose = 0,
|
|
1883
2061
|
max_runtime_secs = None,
|
|
@@ -1945,16 +2123,23 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1945
2123
|
Default Value: False
|
|
1946
2124
|
Types: bool
|
|
1947
2125
|
|
|
2126
|
+
seed:
|
|
2127
|
+
Optional Argument.
|
|
2128
|
+
Specifies the random seed for reproducibility.
|
|
2129
|
+
Default Value: 42
|
|
2130
|
+
Types: int
|
|
2131
|
+
|
|
1948
2132
|
RETURNS:
|
|
1949
2133
|
a tuple containing, model information and leaderboard.
|
|
1950
2134
|
"""
|
|
2135
|
+
|
|
1951
2136
|
# Feature Exploration Phase
|
|
1952
2137
|
_FeatureExplore.__init__(self,
|
|
1953
2138
|
data = self.data,
|
|
1954
2139
|
target_column = self.target_column,
|
|
1955
2140
|
verbose=verbose)
|
|
1956
2141
|
if verbose > 0:
|
|
1957
|
-
self._exploration()
|
|
2142
|
+
self._exploration(**kwargs)
|
|
1958
2143
|
# Feature Engineering Phase
|
|
1959
2144
|
_FeatureEngineering.__init__(self,
|
|
1960
2145
|
data = self.data,
|
|
@@ -1965,7 +2150,8 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1965
2150
|
**kwargs)
|
|
1966
2151
|
# Start time
|
|
1967
2152
|
start_time = time.time()
|
|
1968
|
-
data, excluded_columns, target_label
|
|
2153
|
+
data, excluded_columns, target_label,\
|
|
2154
|
+
data_transformation_params, data_mapping = self.feature_engineering(auto)
|
|
1969
2155
|
|
|
1970
2156
|
# Data preparation Phase
|
|
1971
2157
|
_DataPreparation.__init__(self,
|
|
@@ -1975,8 +2161,18 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1975
2161
|
excluded_columns = excluded_columns,
|
|
1976
2162
|
custom_data = self.custom_data,
|
|
1977
2163
|
data_transform_dict = data_transformation_params,
|
|
2164
|
+
data_mapping = data_mapping,
|
|
1978
2165
|
**kwargs)
|
|
1979
|
-
features, data_transformation_params
|
|
2166
|
+
features, data_transformation_params,\
|
|
2167
|
+
data_mapping = self.data_preparation(auto)
|
|
2168
|
+
|
|
2169
|
+
if kwargs.get('auto_dataprep', False):
|
|
2170
|
+
models_info = None
|
|
2171
|
+
leaderboard = None
|
|
2172
|
+
target_count = None
|
|
2173
|
+
return (models_info, leaderboard,
|
|
2174
|
+
target_count, target_label,
|
|
2175
|
+
data_transformation_params, data_mapping)
|
|
1980
2176
|
|
|
1981
2177
|
# Calculating max_runtime_secs for model training by,
|
|
1982
2178
|
# subtracting the time taken for feature engineering and data preparation
|
|
@@ -1998,12 +2194,14 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
1998
2194
|
custom_data = self.custom_data,
|
|
1999
2195
|
**kwargs)
|
|
2000
2196
|
models_info, leaderboard, target_count = self.model_training(auto = auto,
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2197
|
+
max_runtime_secs = max_runtime_secs,
|
|
2198
|
+
stopping_metric = stopping_metric,
|
|
2199
|
+
stopping_tolerance = stopping_tolerance,
|
|
2200
|
+
max_models = max_models)
|
|
2005
2201
|
|
|
2006
|
-
return (models_info, leaderboard,
|
|
2202
|
+
return (models_info, leaderboard,
|
|
2203
|
+
target_count, target_label,
|
|
2204
|
+
data_transformation_params, data_mapping)
|
|
2007
2205
|
|
|
2008
2206
|
class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
|
|
2009
2207
|
|
|
@@ -2036,7 +2234,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2036
2234
|
self.custom_data = custom_data
|
|
2037
2235
|
|
|
2038
2236
|
def _classification(self,
|
|
2039
|
-
model_list
|
|
2237
|
+
model_list=None,
|
|
2040
2238
|
auto = False,
|
|
2041
2239
|
verbose = 0,
|
|
2042
2240
|
max_runtime_secs = None,
|
|
@@ -2103,18 +2301,26 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2103
2301
|
session.
|
|
2104
2302
|
Default Value: False
|
|
2105
2303
|
Types: bool
|
|
2304
|
+
|
|
2305
|
+
seed:
|
|
2306
|
+
Optional Argument.
|
|
2307
|
+
Specifies the random seed for reproducibility.
|
|
2308
|
+
Default Value: 42
|
|
2309
|
+
Types: int
|
|
2106
2310
|
|
|
2107
2311
|
RETURNS:
|
|
2108
2312
|
a tuple containing, model information and leaderboard.
|
|
2109
2313
|
"""
|
|
2314
|
+
|
|
2110
2315
|
|
|
2111
2316
|
# Feature Exploration Phase
|
|
2112
2317
|
_FeatureExplore.__init__(self,
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2318
|
+
data = self.data,
|
|
2319
|
+
target_column = self.target_column,
|
|
2320
|
+
verbose=verbose,
|
|
2321
|
+
task_type = "classification")
|
|
2116
2322
|
if verbose > 0:
|
|
2117
|
-
self._exploration()
|
|
2323
|
+
self._exploration(**kwargs)
|
|
2118
2324
|
# Feature Engineeting Phase
|
|
2119
2325
|
_FeatureEngineering.__init__(self,
|
|
2120
2326
|
data = self.data,
|
|
@@ -2126,7 +2332,9 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2126
2332
|
**kwargs)
|
|
2127
2333
|
# Start time
|
|
2128
2334
|
start_time = time.time()
|
|
2129
|
-
data, excluded_columns, target_label
|
|
2335
|
+
data, excluded_columns, target_label,\
|
|
2336
|
+
data_transformation_params, data_mapping = self.feature_engineering(auto)
|
|
2337
|
+
|
|
2130
2338
|
# Data Preparation Phase
|
|
2131
2339
|
_DataPreparation.__init__(self,
|
|
2132
2340
|
data = self.data,
|
|
@@ -2136,8 +2344,19 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2136
2344
|
custom_data = self.custom_data,
|
|
2137
2345
|
data_transform_dict = data_transformation_params,
|
|
2138
2346
|
task_type = "Classification",
|
|
2347
|
+
data_mapping = data_mapping,
|
|
2139
2348
|
**kwargs)
|
|
2140
|
-
|
|
2349
|
+
|
|
2350
|
+
features, data_transformation_params, \
|
|
2351
|
+
data_mapping = self.data_preparation(auto)
|
|
2352
|
+
|
|
2353
|
+
if kwargs.get('auto_dataprep', False):
|
|
2354
|
+
models_info = None
|
|
2355
|
+
leaderboard = None
|
|
2356
|
+
target_count = None
|
|
2357
|
+
return (models_info, leaderboard,
|
|
2358
|
+
target_count, target_label,
|
|
2359
|
+
data_transformation_params, data_mapping)
|
|
2141
2360
|
|
|
2142
2361
|
# Calculating max_runtime_secs for model training by,
|
|
2143
2362
|
# subtracting the time taken for feature engineering and data preparation
|
|
@@ -2159,28 +2378,14 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
2159
2378
|
custom_data = self.custom_data,
|
|
2160
2379
|
**kwargs)
|
|
2161
2380
|
models_info, leaderboard, target_count = self.model_training(auto = auto,
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2381
|
+
max_runtime_secs = max_runtime_secs,
|
|
2382
|
+
stopping_metric = stopping_metric,
|
|
2383
|
+
stopping_tolerance = stopping_tolerance,
|
|
2384
|
+
max_models = max_models)
|
|
2166
2385
|
|
|
2167
|
-
return (models_info, leaderboard,
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
"""
|
|
2171
|
-
DESCRIPTION:
|
|
2172
|
-
Internal function displays the target column distribution of Target column/ Response column.
|
|
2173
|
-
"""
|
|
2174
|
-
# If data visualization libraries are available
|
|
2175
|
-
if self._check_visualization_libraries() and not _is_terminal():
|
|
2176
|
-
import matplotlib.pyplot as plt
|
|
2177
|
-
import seaborn as sns
|
|
2178
|
-
self._display_msg(msg='\nTarget Column Distribution:',
|
|
2179
|
-
show_data=True)
|
|
2180
|
-
plt.figure(figsize=(6, 6))
|
|
2181
|
-
# Ploting a histogram for target column
|
|
2182
|
-
sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
|
|
2183
|
-
plt.show()
|
|
2386
|
+
return (models_info, leaderboard,
|
|
2387
|
+
target_count, target_label,
|
|
2388
|
+
data_transformation_params, data_mapping)
|
|
2184
2389
|
|
|
2185
2390
|
def _check_data_imbalance(self,
|
|
2186
2391
|
data=None):
|
|
@@ -2324,6 +2529,9 @@ class AutoRegressor(AutoML):
|
|
|
2324
2529
|
"""
|
|
2325
2530
|
DESCRIPTION:
|
|
2326
2531
|
AutoRegressor is a special purpose AutoML feature to run regression specific tasks.
|
|
2532
|
+
Note:
|
|
2533
|
+
* configure.temp_object_type="VT" follows sequential execution.
|
|
2534
|
+
|
|
2327
2535
|
|
|
2328
2536
|
PARAMETERS:
|
|
2329
2537
|
include:
|
|
@@ -2405,8 +2613,17 @@ class AutoRegressor(AutoML):
|
|
|
2405
2613
|
results are persisted in a table; otherwise,
|
|
2406
2614
|
results are garbage collected at the end of the
|
|
2407
2615
|
session.
|
|
2616
|
+
Note:
|
|
2617
|
+
* User is responsible for cleanup of the persisted tables. List of persisted tables
|
|
2618
|
+
in current session can be viewed using get_persisted_tables() method.
|
|
2408
2619
|
Default Value: False
|
|
2409
2620
|
Types: bool
|
|
2621
|
+
|
|
2622
|
+
seed:
|
|
2623
|
+
Optional Argument.
|
|
2624
|
+
Specifies the random seed for reproducibility.
|
|
2625
|
+
Default Value: 42
|
|
2626
|
+
Types: int
|
|
2410
2627
|
|
|
2411
2628
|
RETURNS:
|
|
2412
2629
|
Instance of AutoRegressor.
|
|
@@ -2555,6 +2772,9 @@ class AutoClassifier(AutoML):
|
|
|
2555
2772
|
"""
|
|
2556
2773
|
DESCRIPTION:
|
|
2557
2774
|
AutoClassifier is a special purpose AutoML feature to run classification specific tasks.
|
|
2775
|
+
Note:
|
|
2776
|
+
* configure.temp_object_type="VT" follows sequential execution.
|
|
2777
|
+
|
|
2558
2778
|
|
|
2559
2779
|
PARAMETERS:
|
|
2560
2780
|
include:
|
|
@@ -2636,8 +2856,17 @@ class AutoClassifier(AutoML):
|
|
|
2636
2856
|
results are persisted in a table; otherwise,
|
|
2637
2857
|
results are garbage collected at the end of the
|
|
2638
2858
|
session.
|
|
2859
|
+
Note:
|
|
2860
|
+
* User is responsible for cleanup of the persisted tables. List of persisted tables
|
|
2861
|
+
in current session can be viewed using get_persisted_tables() method.
|
|
2639
2862
|
Default Value: False
|
|
2640
2863
|
Types: bool
|
|
2864
|
+
|
|
2865
|
+
seed:
|
|
2866
|
+
Optional Argument.
|
|
2867
|
+
Specifies the random seed for reproducibility.
|
|
2868
|
+
Default Value: 42
|
|
2869
|
+
Types: int
|
|
2641
2870
|
|
|
2642
2871
|
RETURNS:
|
|
2643
2872
|
Instance of AutoClassifier.
|
|
@@ -2859,4 +3088,4 @@ class AutoClassifier(AutoML):
|
|
|
2859
3088
|
stopping_tolerance=self.stopping_tolerance,
|
|
2860
3089
|
max_models=self.max_models,
|
|
2861
3090
|
custom_config_file=self.custom_config_file,
|
|
2862
|
-
**kwargs)
|
|
3091
|
+
**kwargs)
|