teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +71 -0
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +51 -24
- teradataml/analytics/json_parser/utils.py +11 -17
- teradataml/automl/__init__.py +103 -48
- teradataml/automl/data_preparation.py +55 -37
- teradataml/automl/data_transformation.py +131 -69
- teradataml/automl/feature_engineering.py +117 -185
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +13 -25
- teradataml/automl/model_training.py +214 -75
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +11 -6
- teradataml/common/garbagecollector.py +5 -0
- teradataml/common/messagecodes.py +3 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/utils.py +6 -0
- teradataml/context/context.py +49 -29
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/glm_example.json +28 -1
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +20 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
- teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
- teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
- teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
- teradataml/data/teradataml_example.json +77 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +120 -61
- teradataml/dataframe/dataframe.py +102 -17
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +272 -89
- teradataml/dataframe/sql.py +84 -0
- teradataml/dbutils/dbutils.py +2 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
- teradataml/options/__init__.py +13 -4
- teradataml/options/configure.py +27 -6
- teradataml/scriptmgmt/UserEnv.py +19 -16
- teradataml/scriptmgmt/lls_utils.py +117 -14
- teradataml/table_operators/Script.py +2 -3
- teradataml/table_operators/TableOperator.py +58 -10
- teradataml/utils/validators.py +40 -2
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
teradataml/automl/__init__.py
CHANGED
|
@@ -30,6 +30,7 @@ from teradataml.utils.dtypes import _Dtypes
|
|
|
30
30
|
from teradataml.common.utils import UtilFuncs
|
|
31
31
|
from teradataml import TeradataMlException
|
|
32
32
|
from teradataml.common.messages import Messages, MessageCodes
|
|
33
|
+
from teradatasqlalchemy.telemetry.queryband import collect_queryband
|
|
33
34
|
|
|
34
35
|
# AutoML Internal libraries
|
|
35
36
|
from teradataml.automl.data_preparation import _DataPreparation
|
|
@@ -51,6 +52,7 @@ class AutoML:
|
|
|
51
52
|
max_runtime_secs = None,
|
|
52
53
|
stopping_metric = None,
|
|
53
54
|
stopping_tolerance = None,
|
|
55
|
+
max_models = None,
|
|
54
56
|
custom_config_file = None):
|
|
55
57
|
"""
|
|
56
58
|
DESCRIPTION:
|
|
@@ -82,12 +84,12 @@ class AutoML:
|
|
|
82
84
|
AutoML also provides an option to customize the processes within feature
|
|
83
85
|
engineering, data preparation and model training phases. User can customize
|
|
84
86
|
the processes by passing the JSON file path in case of custom run. It also
|
|
85
|
-
supports early stopping of model training based on stopping metrics
|
|
86
|
-
maximum running time.
|
|
87
|
+
supports early stopping of model training based on stopping metrics,
|
|
88
|
+
maximum running time and maximum models to be trained.
|
|
87
89
|
|
|
88
90
|
PARAMETERS:
|
|
89
91
|
task_type:
|
|
90
|
-
Optional
|
|
92
|
+
Optional Argument.
|
|
91
93
|
Specifies the task type for AutoML, whether to apply regression OR classification
|
|
92
94
|
on the provided dataset. If user wants AutoML to decide the task type automatically,
|
|
93
95
|
then it should be set to "Default".
|
|
@@ -122,7 +124,7 @@ class AutoML:
|
|
|
122
124
|
Types: int
|
|
123
125
|
|
|
124
126
|
max_runtime_secs:
|
|
125
|
-
Optional
|
|
127
|
+
Optional Argument.
|
|
126
128
|
Specifies the time limit in seconds for model training.
|
|
127
129
|
Types: int
|
|
128
130
|
|
|
@@ -143,6 +145,11 @@ class AutoML:
|
|
|
143
145
|
Required, when "stopping_metric" is set, otherwise optional.
|
|
144
146
|
Specifies the stopping tolerance for stopping metrics in model training.
|
|
145
147
|
Types: float
|
|
148
|
+
|
|
149
|
+
max_models:
|
|
150
|
+
Optional Argument.
|
|
151
|
+
Specifies the maximum number of models to be trained.
|
|
152
|
+
Types: int
|
|
146
153
|
|
|
147
154
|
custom_config_file:
|
|
148
155
|
Optional Argument.
|
|
@@ -285,6 +292,7 @@ class AutoML:
|
|
|
285
292
|
>>> exclude="xgboost",
|
|
286
293
|
>>> stopping_metric="R2",
|
|
287
294
|
>>> stopping_tolerance=0.7,
|
|
295
|
+
>>> max_models=10,
|
|
288
296
|
>>> custom_config_file="custom_housing.json")
|
|
289
297
|
# Fit the data.
|
|
290
298
|
>>> automl_obj.fit(housing_train, "price")
|
|
@@ -303,7 +311,8 @@ class AutoML:
|
|
|
303
311
|
# Create instance of AutoML.
|
|
304
312
|
>>> automl_obj = AutoML(verbose=2,
|
|
305
313
|
>>> exclude="xgboost",
|
|
306
|
-
>>> max_runtime_secs=500
|
|
314
|
+
>>> max_runtime_secs=500,
|
|
315
|
+
>>> max_models=3)
|
|
307
316
|
# Fit the data.
|
|
308
317
|
>>> automl_obj.fit(iris_input, iris_input.species)
|
|
309
318
|
|
|
@@ -339,6 +348,7 @@ class AutoML:
|
|
|
339
348
|
'WEIGHTED-PRECISION','WEIGHTED-RECALL',
|
|
340
349
|
'WEIGHTED-F1', 'ACCURACY']])
|
|
341
350
|
arg_info_matrix.append(["stopping_tolerance", stopping_tolerance, True, (float, int)])
|
|
351
|
+
arg_info_matrix.append(["max_models", max_models, True, (int)])
|
|
342
352
|
arg_info_matrix.append(["custom_config_file", custom_config_file, True, (str), True])
|
|
343
353
|
|
|
344
354
|
|
|
@@ -349,6 +359,8 @@ class AutoML:
|
|
|
349
359
|
_Validators._validate_mutually_exclusive_arguments(include, "include", exclude, "exclude")
|
|
350
360
|
# Validate mutually inclusive arguments
|
|
351
361
|
_Validators._validate_mutually_inclusive_arguments(stopping_metric, "stopping_metric", stopping_tolerance, "stopping_tolerance")
|
|
362
|
+
# Validate lower range for max_models
|
|
363
|
+
_Validators._validate_argument_range(max_models, "max_models", lbound=1, lbound_inclusive=True)
|
|
352
364
|
|
|
353
365
|
custom_data = None
|
|
354
366
|
self.auto = True
|
|
@@ -375,10 +387,12 @@ class AutoML:
|
|
|
375
387
|
self.max_runtime_secs = max_runtime_secs
|
|
376
388
|
self.stopping_metric = stopping_metric
|
|
377
389
|
self.stopping_tolerance = stopping_tolerance
|
|
390
|
+
self.max_models = max_models
|
|
378
391
|
self.model_list = ['decision_forest', 'xgboost', 'knn', 'svm', 'glm']
|
|
379
392
|
self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
|
|
380
393
|
self._is_fit_called = False
|
|
381
394
|
|
|
395
|
+
@collect_queryband(queryband="AutoML_fit")
|
|
382
396
|
def fit(self,
|
|
383
397
|
data,
|
|
384
398
|
target_column):
|
|
@@ -394,7 +408,7 @@ class AutoML:
|
|
|
394
408
|
Types: teradataml Dataframe
|
|
395
409
|
|
|
396
410
|
target_column:
|
|
397
|
-
Required
|
|
411
|
+
Required Argument.
|
|
398
412
|
Specifies target column of dataset.
|
|
399
413
|
Types: str or ColumnExpression
|
|
400
414
|
|
|
@@ -484,7 +498,7 @@ class AutoML:
|
|
|
484
498
|
|
|
485
499
|
# Displaying received custom input
|
|
486
500
|
if self.custom_data:
|
|
487
|
-
print("\
|
|
501
|
+
print("\nReceived below input for customization : ")
|
|
488
502
|
print(json.dumps(self.custom_data, indent=4))
|
|
489
503
|
|
|
490
504
|
# Classification probelm
|
|
@@ -506,13 +520,15 @@ class AutoML:
|
|
|
506
520
|
verbose = self.verbose,
|
|
507
521
|
max_runtime_secs = self.max_runtime_secs,
|
|
508
522
|
stopping_metric = self.stopping_metric,
|
|
509
|
-
stopping_tolerance = self.stopping_tolerance
|
|
510
|
-
)
|
|
523
|
+
stopping_tolerance = self.stopping_tolerance,
|
|
524
|
+
max_models = self.max_models)
|
|
525
|
+
|
|
511
526
|
# Model Evaluation Phase
|
|
512
527
|
self.m_evaluator = _ModelEvaluator(self.model_info,
|
|
513
528
|
self.target_column,
|
|
514
529
|
self.task_type)
|
|
515
530
|
|
|
531
|
+
@collect_queryband(queryband="AutoML_predict")
|
|
516
532
|
def predict(self,
|
|
517
533
|
data = None,
|
|
518
534
|
rank = 1):
|
|
@@ -582,6 +598,10 @@ class AutoML:
|
|
|
582
598
|
|
|
583
599
|
# Validate argument types
|
|
584
600
|
_Validators._validate_function_arguments(arg_info_pred_matrix)
|
|
601
|
+
# Validate range for model rank
|
|
602
|
+
_Validators._validate_argument_range(rank, "rank", lbound=1,
|
|
603
|
+
ubound=self.leader_board.Rank.max(),
|
|
604
|
+
lbound_inclusive=True, ubound_inclusive=True)
|
|
585
605
|
|
|
586
606
|
# Setting test data indicator to default value, i.e., False.
|
|
587
607
|
self.test_data_ind = False
|
|
@@ -633,7 +653,7 @@ class AutoML:
|
|
|
633
653
|
if self.is_classification_type() and self.target_label is not None:
|
|
634
654
|
# Displaying target column labels
|
|
635
655
|
tar_dct = {}
|
|
636
|
-
print('
|
|
656
|
+
print('\nTarget Column Mapping:')
|
|
637
657
|
# Iterating rows
|
|
638
658
|
for row in self.target_label.result.itertuples():
|
|
639
659
|
# Retrieving the category names of encoded target column
|
|
@@ -645,13 +665,13 @@ class AutoML:
|
|
|
645
665
|
for key, value in tar_dct.items():
|
|
646
666
|
print(f"{key}: {value}")
|
|
647
667
|
|
|
648
|
-
print("\
|
|
668
|
+
print("\nPrediction : ")
|
|
649
669
|
print(pred.result)
|
|
650
670
|
|
|
651
671
|
# Showing performance metrics if there is no test data
|
|
652
672
|
# Or if target column is present in test data.
|
|
653
673
|
if not self.test_data_ind or self.target_column_ind:
|
|
654
|
-
print("\
|
|
674
|
+
print("\nPerformance Metrics : ")
|
|
655
675
|
print(metrics.result)
|
|
656
676
|
|
|
657
677
|
prediction_column = 'prediction' if 'prediction' in pred.result.columns else 'Prediction'
|
|
@@ -669,19 +689,20 @@ class AutoML:
|
|
|
669
689
|
}
|
|
670
690
|
# Fitting ROC
|
|
671
691
|
roc_out = ROC(**fit_params)
|
|
672
|
-
print("\
|
|
692
|
+
print("\nROC-AUC : ")
|
|
673
693
|
print_data(roc_out.result)
|
|
674
694
|
print_data(roc_out.output_data)
|
|
675
695
|
|
|
676
696
|
# Displaying confusion matrix for binary and multiclass classification
|
|
677
697
|
prediction_df=pred.result.to_pandas()
|
|
678
698
|
target_col = self.target_column
|
|
679
|
-
print("\
|
|
699
|
+
print("\nConfusion Matrix : ")
|
|
680
700
|
print_data(confusion_matrix(prediction_df[target_col], prediction_df[prediction_column]))
|
|
681
701
|
|
|
682
702
|
# Returning prediction
|
|
683
703
|
return pred.result
|
|
684
704
|
|
|
705
|
+
@collect_queryband(queryband="AutoML_leaderboard")
|
|
685
706
|
def leaderboard(self):
|
|
686
707
|
"""
|
|
687
708
|
DESCRIPTION:
|
|
@@ -709,6 +730,7 @@ class AutoML:
|
|
|
709
730
|
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
710
731
|
return self.leader_board
|
|
711
732
|
|
|
733
|
+
@collect_queryband(queryband="AutoML_leader")
|
|
712
734
|
def leader(self):
|
|
713
735
|
"""
|
|
714
736
|
DESCRIPTION:
|
|
@@ -810,12 +832,12 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
810
832
|
Types: teradataml Dataframe
|
|
811
833
|
|
|
812
834
|
target_column:
|
|
813
|
-
Required
|
|
835
|
+
Required Argument.
|
|
814
836
|
Specifies the name of the target column in "data".
|
|
815
837
|
Types: str
|
|
816
838
|
|
|
817
839
|
custom_data:
|
|
818
|
-
Optional
|
|
840
|
+
Optional Argument.
|
|
819
841
|
Specifies json object containing user customized input.
|
|
820
842
|
Types: json object
|
|
821
843
|
"""
|
|
@@ -830,14 +852,15 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
830
852
|
verbose = 0,
|
|
831
853
|
max_runtime_secs = None,
|
|
832
854
|
stopping_metric = None,
|
|
833
|
-
stopping_tolerance = None
|
|
855
|
+
stopping_tolerance = None,
|
|
856
|
+
max_models = None):
|
|
834
857
|
"""
|
|
835
858
|
DESCRIPTION:
|
|
836
859
|
Interal Function runs Regression.
|
|
837
860
|
|
|
838
861
|
PARAMETERS:
|
|
839
862
|
auto:
|
|
840
|
-
Optional
|
|
863
|
+
Optional Argument.
|
|
841
864
|
Specifies whether to run AutoML in custom mode or auto mode.
|
|
842
865
|
When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
|
|
843
866
|
Types: bool
|
|
@@ -853,19 +876,24 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
853
876
|
Types: int
|
|
854
877
|
|
|
855
878
|
max_runtime_secs:
|
|
856
|
-
Optional
|
|
879
|
+
Optional Argument.
|
|
857
880
|
Specifies the time limit in seconds for model training.
|
|
858
881
|
Types: int
|
|
859
882
|
|
|
860
883
|
stopping_metric:
|
|
861
884
|
Required, when "stopping_tolerance" is set, otherwise optional.
|
|
862
|
-
|
|
885
|
+
Specifies the stopping mertics for stopping tolerance in model training.
|
|
863
886
|
Types: str
|
|
864
887
|
|
|
865
888
|
stopping_tolerance:
|
|
866
889
|
Required, when "stopping_metric" is set, otherwise optional.
|
|
867
|
-
|
|
890
|
+
Specifies the stopping tolerance for stopping metrics in model training.
|
|
868
891
|
Types: float
|
|
892
|
+
|
|
893
|
+
max_models:
|
|
894
|
+
Optional Argument.
|
|
895
|
+
Specifies the maximum number of models to be trained.
|
|
896
|
+
Types: int
|
|
869
897
|
|
|
870
898
|
RETURNS:
|
|
871
899
|
a tuple containing, model information and leaderboard.
|
|
@@ -919,7 +947,8 @@ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _Model
|
|
|
919
947
|
models_info, leaderboard, target_count = self.model_training(auto = auto,
|
|
920
948
|
max_runtime_secs = max_runtime_secs,
|
|
921
949
|
stopping_metric = stopping_metric,
|
|
922
|
-
stopping_tolerance = stopping_tolerance
|
|
950
|
+
stopping_tolerance = stopping_tolerance,
|
|
951
|
+
max_models = max_models)
|
|
923
952
|
|
|
924
953
|
return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
|
|
925
954
|
|
|
@@ -940,12 +969,12 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
940
969
|
Types: teradataml Dataframe
|
|
941
970
|
|
|
942
971
|
target_column:
|
|
943
|
-
Required
|
|
972
|
+
Required Argument.
|
|
944
973
|
Specifies the name of the target column in "data".
|
|
945
974
|
Types: str
|
|
946
975
|
|
|
947
976
|
custom_data:
|
|
948
|
-
Optional
|
|
977
|
+
Optional Argument.
|
|
949
978
|
Specifies json object containing user customized input.
|
|
950
979
|
Types: json object
|
|
951
980
|
"""
|
|
@@ -959,14 +988,15 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
959
988
|
verbose = 0,
|
|
960
989
|
max_runtime_secs = None,
|
|
961
990
|
stopping_metric = None,
|
|
962
|
-
stopping_tolerance = None
|
|
991
|
+
stopping_tolerance = None,
|
|
992
|
+
max_models = None):
|
|
963
993
|
"""
|
|
964
994
|
DESCRIPTION:
|
|
965
995
|
Interal Function runs Classification.
|
|
966
996
|
|
|
967
997
|
PARAMETERS:
|
|
968
998
|
auto:
|
|
969
|
-
Optional
|
|
999
|
+
Optional Argument.
|
|
970
1000
|
Specifies whether to run AutoML in custom mode or auto mode.
|
|
971
1001
|
When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
|
|
972
1002
|
Types: bool
|
|
@@ -982,7 +1012,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
982
1012
|
Types: int
|
|
983
1013
|
|
|
984
1014
|
max_runtime_secs:
|
|
985
|
-
Optional
|
|
1015
|
+
Optional Argument.
|
|
986
1016
|
Specifies the time limit in seconds for model training.
|
|
987
1017
|
Types: int
|
|
988
1018
|
|
|
@@ -995,6 +1025,11 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
995
1025
|
Required, when "stopping_metric" is set, otherwise optional.
|
|
996
1026
|
Specifies the stopping tolerance for stopping metrics in model training.
|
|
997
1027
|
Types: float
|
|
1028
|
+
|
|
1029
|
+
max_models:
|
|
1030
|
+
Optional Argument.
|
|
1031
|
+
Specifies the maximum number of models to be trained.
|
|
1032
|
+
Types: int
|
|
998
1033
|
|
|
999
1034
|
RETURNS:
|
|
1000
1035
|
a tuple containing, model information and leaderboard.
|
|
@@ -1051,7 +1086,8 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
1051
1086
|
models_info, leaderboard, target_count = self.model_training(auto = auto,
|
|
1052
1087
|
max_runtime_secs = max_runtime_secs,
|
|
1053
1088
|
stopping_metric = stopping_metric,
|
|
1054
|
-
stopping_tolerance = stopping_tolerance
|
|
1089
|
+
stopping_tolerance = stopping_tolerance,
|
|
1090
|
+
max_models = max_models)
|
|
1055
1091
|
|
|
1056
1092
|
return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
|
|
1057
1093
|
|
|
@@ -1166,7 +1202,7 @@ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _M
|
|
|
1166
1202
|
min_label_count = min(data[self.target_column].value_counts())
|
|
1167
1203
|
if self._data_sampling_method == 'SMOTE':
|
|
1168
1204
|
n_neighbors = min(5, min_label_count - 1)
|
|
1169
|
-
sampling_method = SMOTE(k_neighbors=n_neighbors, random_state=
|
|
1205
|
+
sampling_method = SMOTE(k_neighbors=n_neighbors, random_state=42)
|
|
1170
1206
|
else:
|
|
1171
1207
|
n_neighbors = min(3, min_label_count)
|
|
1172
1208
|
sampling_method = NearMiss(version=1, n_neighbors=n_neighbors)
|
|
@@ -1206,6 +1242,7 @@ class AutoRegressor(AutoML):
|
|
|
1206
1242
|
max_runtime_secs=None,
|
|
1207
1243
|
stopping_metric=None,
|
|
1208
1244
|
stopping_tolerance=None,
|
|
1245
|
+
max_models=None,
|
|
1209
1246
|
custom_config_file=None
|
|
1210
1247
|
):
|
|
1211
1248
|
"""
|
|
@@ -1239,7 +1276,7 @@ class AutoRegressor(AutoML):
|
|
|
1239
1276
|
Types: int
|
|
1240
1277
|
|
|
1241
1278
|
max_runtime_secs:
|
|
1242
|
-
Optional
|
|
1279
|
+
Optional Argument.
|
|
1243
1280
|
Specifies the time limit in seconds for model training.
|
|
1244
1281
|
Types: int
|
|
1245
1282
|
|
|
@@ -1260,6 +1297,11 @@ class AutoRegressor(AutoML):
|
|
|
1260
1297
|
Required, when "stopping_metric" is set, otherwise optional.
|
|
1261
1298
|
Specifies the stopping tolerance for stopping metrics in model training.
|
|
1262
1299
|
Types: float
|
|
1300
|
+
|
|
1301
|
+
max_models:
|
|
1302
|
+
Optional Argument.
|
|
1303
|
+
Specifies the maximum number of models to be trained.
|
|
1304
|
+
Types: int
|
|
1263
1305
|
|
|
1264
1306
|
custom_config_file:
|
|
1265
1307
|
Optional Argument.
|
|
@@ -1325,10 +1367,11 @@ class AutoRegressor(AutoML):
|
|
|
1325
1367
|
|
|
1326
1368
|
# Create instance of AutoRegressor.
|
|
1327
1369
|
>>> automl_obj = AutoRegressor(verbose=2,
|
|
1328
|
-
>>>
|
|
1329
|
-
>>>
|
|
1330
|
-
>>>
|
|
1331
|
-
>>>
|
|
1370
|
+
>>> exclude="xgboost",
|
|
1371
|
+
>>> stopping_metric="R2",
|
|
1372
|
+
>>> stopping_tolerance=0.7,
|
|
1373
|
+
>>> max_models=10,
|
|
1374
|
+
>>> custom_config_file="custom_housing.json")
|
|
1332
1375
|
# Fit the data.
|
|
1333
1376
|
>>> automl_obj.fit(housing_train, "price")
|
|
1334
1377
|
|
|
@@ -1345,8 +1388,8 @@ class AutoRegressor(AutoML):
|
|
|
1345
1388
|
|
|
1346
1389
|
# Create instance of AutoRegressor.
|
|
1347
1390
|
>>> automl_obj = AutoRegressor(verbose=2,
|
|
1348
|
-
>>>
|
|
1349
|
-
>>>
|
|
1391
|
+
>>> exclude="xgboost",
|
|
1392
|
+
>>> max_runtime_secs=500)
|
|
1350
1393
|
# Fit the data.
|
|
1351
1394
|
>>> automl_obj.fit(housing_train, "price")
|
|
1352
1395
|
|
|
@@ -1368,6 +1411,7 @@ class AutoRegressor(AutoML):
|
|
|
1368
1411
|
self.max_runtime_secs = max_runtime_secs
|
|
1369
1412
|
self.stopping_metric = stopping_metric
|
|
1370
1413
|
self.stopping_tolerance = stopping_tolerance
|
|
1414
|
+
self.max_models = max_models
|
|
1371
1415
|
self.custom_config_file = custom_config_file
|
|
1372
1416
|
self.task_type = "Regression"
|
|
1373
1417
|
self.include = include
|
|
@@ -1380,6 +1424,7 @@ class AutoRegressor(AutoML):
|
|
|
1380
1424
|
max_runtime_secs=self.max_runtime_secs,
|
|
1381
1425
|
stopping_metric=self.stopping_metric,
|
|
1382
1426
|
stopping_tolerance=self.stopping_tolerance,
|
|
1427
|
+
max_models=self.max_models,
|
|
1383
1428
|
custom_config_file=self.custom_config_file)
|
|
1384
1429
|
class AutoClassifier(AutoML):
|
|
1385
1430
|
|
|
@@ -1390,6 +1435,7 @@ class AutoClassifier(AutoML):
|
|
|
1390
1435
|
max_runtime_secs=None,
|
|
1391
1436
|
stopping_metric=None,
|
|
1392
1437
|
stopping_tolerance=None,
|
|
1438
|
+
max_models=None,
|
|
1393
1439
|
custom_config_file=None
|
|
1394
1440
|
):
|
|
1395
1441
|
"""
|
|
@@ -1423,18 +1469,13 @@ class AutoClassifier(AutoML):
|
|
|
1423
1469
|
Types: int
|
|
1424
1470
|
|
|
1425
1471
|
max_runtime_secs:
|
|
1426
|
-
Optional
|
|
1472
|
+
Optional Argument.
|
|
1427
1473
|
Specifies the time limit in seconds for model training.
|
|
1428
1474
|
Types: int
|
|
1429
1475
|
|
|
1430
1476
|
stopping_metric:
|
|
1431
1477
|
Required, when "stopping_tolerance" is set, otherwise optional.
|
|
1432
1478
|
Specifies the stopping mertics for stopping tolerance in model training.
|
|
1433
|
-
Types: str
|
|
1434
|
-
|
|
1435
|
-
stopping_tolerance:
|
|
1436
|
-
Required, when "stopping_metric" is set, otherwise optional.
|
|
1437
|
-
Specifies the stopping tolerance for stopping metrics in model training.
|
|
1438
1479
|
Permitted Values:
|
|
1439
1480
|
* For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
|
|
1440
1481
|
"RMSE", "RMSLE"
|
|
@@ -1443,7 +1484,17 @@ class AutoClassifier(AutoML):
|
|
|
1443
1484
|
'MICRO-PRECISION', 'MACRO-PRECISION',
|
|
1444
1485
|
'WEIGHTED-PRECISION','WEIGHTED-RECALL',
|
|
1445
1486
|
'WEIGHTED-F1', 'ACCURACY'
|
|
1487
|
+
Types: str
|
|
1488
|
+
|
|
1489
|
+
stopping_tolerance:
|
|
1490
|
+
Required, when "stopping_metric" is set, otherwise optional.
|
|
1491
|
+
Specifies the stopping tolerance for stopping metrics in model training.
|
|
1446
1492
|
Types: float
|
|
1493
|
+
|
|
1494
|
+
max_models:
|
|
1495
|
+
Optional Argument.
|
|
1496
|
+
Specifies the maximum number of models to be trained.
|
|
1497
|
+
Types: int
|
|
1447
1498
|
|
|
1448
1499
|
custom_config_file:
|
|
1449
1500
|
Optional Argument.
|
|
@@ -1570,10 +1621,11 @@ class AutoClassifier(AutoML):
|
|
|
1570
1621
|
|
|
1571
1622
|
# Create instance of AutoClassifier.
|
|
1572
1623
|
>>> automl_obj = AutoClassifier(verbose=2,
|
|
1573
|
-
>>>
|
|
1574
|
-
>>>
|
|
1575
|
-
>>>
|
|
1576
|
-
>>>
|
|
1624
|
+
>>> exclude="xgboost",
|
|
1625
|
+
>>> stopping_metric="MICRO-F1",
|
|
1626
|
+
>>> stopping_tolerance=0.7,
|
|
1627
|
+
>>> max_models=8
|
|
1628
|
+
>>> custom_config_file="custom_titanic.json")
|
|
1577
1629
|
# Fit the data.
|
|
1578
1630
|
>>> automl_obj.fit(titanic, titanic.survived)
|
|
1579
1631
|
|
|
@@ -1590,8 +1642,9 @@ class AutoClassifier(AutoML):
|
|
|
1590
1642
|
|
|
1591
1643
|
# Create instance of AutoClassifier.
|
|
1592
1644
|
>>> automl_obj = AutoClassifier(verbose=2,
|
|
1593
|
-
>>>
|
|
1594
|
-
>>>
|
|
1645
|
+
>>> exclude="xgboost",
|
|
1646
|
+
>>> max_runtime_secs=500)
|
|
1647
|
+
>>> max_models=3)
|
|
1595
1648
|
# Fit the data.
|
|
1596
1649
|
>>> automl_obj.fit(iris_input, iris_input.species)
|
|
1597
1650
|
|
|
@@ -1613,6 +1666,7 @@ class AutoClassifier(AutoML):
|
|
|
1613
1666
|
self.max_runtime_secs = max_runtime_secs
|
|
1614
1667
|
self.stopping_metric = stopping_metric
|
|
1615
1668
|
self.stopping_tolerance = stopping_tolerance
|
|
1669
|
+
self.max_models = max_models
|
|
1616
1670
|
self.custom_config_file = custom_config_file
|
|
1617
1671
|
self.task_type = "Classification"
|
|
1618
1672
|
self.include = include
|
|
@@ -1625,4 +1679,5 @@ class AutoClassifier(AutoML):
|
|
|
1625
1679
|
max_runtime_secs=self.max_runtime_secs,
|
|
1626
1680
|
stopping_metric=self.stopping_metric,
|
|
1627
1681
|
stopping_tolerance=self.stopping_tolerance,
|
|
1682
|
+
max_models=self.max_models,
|
|
1628
1683
|
custom_config_file=self.custom_config_file)
|