PyPI - teradataml - Versions diffs - 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl - Mend

teradataml 17.20.0.6py3-none-any.whl → 20.0.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (432) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +238 -1
teradataml/__init__.py +13 -3
teradataml/_version.py +1 -1
teradataml/analytics/Transformations.py +4 -4
teradataml/analytics/__init__.py +0 -2
teradataml/analytics/analytic_function_executor.py +3 -0
teradataml/analytics/json_parser/utils.py +13 -12
teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
teradataml/analytics/sqle/__init__.py +0 -13
teradataml/analytics/utils.py +1 -0
teradataml/analytics/valib.py +3 -0
teradataml/automl/__init__.py +1628 -0
teradataml/automl/custom_json_utils.py +1270 -0
teradataml/automl/data_preparation.py +993 -0
teradataml/automl/data_transformation.py +727 -0
teradataml/automl/feature_engineering.py +1648 -0
teradataml/automl/feature_exploration.py +547 -0
teradataml/automl/model_evaluation.py +163 -0
teradataml/automl/model_training.py +887 -0
teradataml/catalog/__init__.py +0 -2
teradataml/catalog/byom.py +49 -6
teradataml/catalog/function_argument_mapper.py +0 -2
teradataml/catalog/model_cataloging_utils.py +2 -1021
teradataml/common/aed_utils.py +6 -2
teradataml/common/constants.py +50 -58
teradataml/common/deprecations.py +160 -0
teradataml/common/garbagecollector.py +61 -104
teradataml/common/messagecodes.py +27 -36
teradataml/common/messages.py +11 -15
teradataml/common/utils.py +205 -287
teradataml/common/wrapper_utils.py +1 -110
teradataml/context/context.py +150 -78
teradataml/data/bank_churn.csv +10001 -0
teradataml/data/bmi.csv +501 -0
teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
teradataml/data/fish.csv +160 -0
teradataml/data/glass_types.csv +215 -0
teradataml/data/insurance.csv +1 -1
teradataml/data/iris_data.csv +151 -0
teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
teradataml/data/load_example_data.py +3 -0
teradataml/data/multi_model_classification.csv +401 -0
teradataml/data/multi_model_regression.csv +401 -0
teradataml/data/openml_example.json +63 -0
teradataml/data/scripts/deploy_script.py +65 -0
teradataml/data/scripts/mapper.R +20 -0
teradataml/data/scripts/sklearn/__init__.py +0 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
teradataml/data/templates/open_source_ml.json +9 -0
teradataml/data/teradataml_example.json +73 -1
teradataml/data/test_classification.csv +101 -0
teradataml/data/test_prediction.csv +101 -0
teradataml/data/test_regression.csv +101 -0
teradataml/data/train_multiclass.csv +101 -0
teradataml/data/train_regression.csv +101 -0
teradataml/data/train_regression_multiple_labels.csv +101 -0
teradataml/data/wine_data.csv +1600 -0
teradataml/dataframe/copy_to.py +79 -13
teradataml/dataframe/data_transfer.py +8 -0
teradataml/dataframe/dataframe.py +910 -311
teradataml/dataframe/dataframe_utils.py +102 -5
teradataml/dataframe/fastload.py +11 -3
teradataml/dataframe/setop.py +15 -2
teradataml/dataframe/sql.py +3735 -77
teradataml/dataframe/sql_function_parameters.py +56 -5
teradataml/dataframe/vantage_function_types.py +45 -1
teradataml/dataframe/window.py +30 -29
teradataml/dbutils/dbutils.py +18 -1
teradataml/geospatial/geodataframe.py +18 -7
teradataml/geospatial/geodataframecolumn.py +5 -0
teradataml/hyperparameter_tuner/optimizer.py +910 -120
teradataml/hyperparameter_tuner/utils.py +131 -37
teradataml/lib/aed_0_1.dll +0 -0
teradataml/lib/libaed_0_1.dylib +0 -0
teradataml/lib/libaed_0_1.so +0 -0
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/__init__.py +1 -0
teradataml/opensource/sklearn/__init__.py +1 -0
teradataml/opensource/sklearn/_class.py +255 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
teradataml/opensource/sklearn/constants.py +54 -0
teradataml/options/__init__.py +3 -6
teradataml/options/configure.py +21 -20
teradataml/scriptmgmt/UserEnv.py +61 -5
teradataml/scriptmgmt/lls_utils.py +135 -53
teradataml/table_operators/Apply.py +38 -6
teradataml/table_operators/Script.py +45 -308
teradataml/table_operators/TableOperator.py +182 -591
teradataml/table_operators/__init__.py +0 -1
teradataml/table_operators/table_operator_util.py +32 -40
teradataml/utils/validators.py +127 -3
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
teradataml/analytics/mle/AdaBoost.py +0 -651
teradataml/analytics/mle/AdaBoostPredict.py +0 -564
teradataml/analytics/mle/Antiselect.py +0 -342
teradataml/analytics/mle/Arima.py +0 -641
teradataml/analytics/mle/ArimaPredict.py +0 -477
teradataml/analytics/mle/Attribution.py +0 -1070
teradataml/analytics/mle/Betweenness.py +0 -658
teradataml/analytics/mle/Burst.py +0 -711
teradataml/analytics/mle/CCM.py +0 -600
teradataml/analytics/mle/CCMPrepare.py +0 -324
teradataml/analytics/mle/CFilter.py +0 -460
teradataml/analytics/mle/ChangePointDetection.py +0 -572
teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
teradataml/analytics/mle/Closeness.py +0 -737
teradataml/analytics/mle/ConfusionMatrix.py +0 -420
teradataml/analytics/mle/Correlation.py +0 -477
teradataml/analytics/mle/Correlation2.py +0 -573
teradataml/analytics/mle/CoxHazardRatio.py +0 -679
teradataml/analytics/mle/CoxPH.py +0 -556
teradataml/analytics/mle/CoxSurvival.py +0 -478
teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
teradataml/analytics/mle/DTW.py +0 -623
teradataml/analytics/mle/DWT.py +0 -564
teradataml/analytics/mle/DWT2D.py +0 -599
teradataml/analytics/mle/DecisionForest.py +0 -716
teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
teradataml/analytics/mle/DecisionForestPredict.py +0 -561
teradataml/analytics/mle/DecisionTree.py +0 -830
teradataml/analytics/mle/DecisionTreePredict.py +0 -528
teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
teradataml/analytics/mle/FMeasure.py +0 -402
teradataml/analytics/mle/FPGrowth.py +0 -734
teradataml/analytics/mle/FrequentPaths.py +0 -695
teradataml/analytics/mle/GLM.py +0 -558
teradataml/analytics/mle/GLML1L2.py +0 -547
teradataml/analytics/mle/GLML1L2Predict.py +0 -519
teradataml/analytics/mle/GLMPredict.py +0 -529
teradataml/analytics/mle/HMMDecoder.py +0 -945
teradataml/analytics/mle/HMMEvaluator.py +0 -901
teradataml/analytics/mle/HMMSupervised.py +0 -521
teradataml/analytics/mle/HMMUnsupervised.py +0 -572
teradataml/analytics/mle/Histogram.py +0 -561
teradataml/analytics/mle/IDWT.py +0 -476
teradataml/analytics/mle/IDWT2D.py +0 -493
teradataml/analytics/mle/IdentityMatch.py +0 -763
teradataml/analytics/mle/Interpolator.py +0 -918
teradataml/analytics/mle/KMeans.py +0 -485
teradataml/analytics/mle/KNN.py +0 -627
teradataml/analytics/mle/KNNRecommender.py +0 -488
teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
teradataml/analytics/mle/LAR.py +0 -439
teradataml/analytics/mle/LARPredict.py +0 -478
teradataml/analytics/mle/LDA.py +0 -548
teradataml/analytics/mle/LDAInference.py +0 -492
teradataml/analytics/mle/LDATopicSummary.py +0 -464
teradataml/analytics/mle/LevenshteinDistance.py +0 -450
teradataml/analytics/mle/LinReg.py +0 -433
teradataml/analytics/mle/LinRegPredict.py +0 -438
teradataml/analytics/mle/MinHash.py +0 -544
teradataml/analytics/mle/Modularity.py +0 -587
teradataml/analytics/mle/NEREvaluator.py +0 -410
teradataml/analytics/mle/NERExtractor.py +0 -595
teradataml/analytics/mle/NERTrainer.py +0 -458
teradataml/analytics/mle/NGrams.py +0 -570
teradataml/analytics/mle/NPath.py +0 -634
teradataml/analytics/mle/NTree.py +0 -549
teradataml/analytics/mle/NaiveBayes.py +0 -462
teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
teradataml/analytics/mle/NamedEntityFinder.py +0 -529
teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
teradataml/analytics/mle/POSTagger.py +0 -417
teradataml/analytics/mle/Pack.py +0 -411
teradataml/analytics/mle/PageRank.py +0 -535
teradataml/analytics/mle/PathAnalyzer.py +0 -426
teradataml/analytics/mle/PathGenerator.py +0 -367
teradataml/analytics/mle/PathStart.py +0 -464
teradataml/analytics/mle/PathSummarizer.py +0 -470
teradataml/analytics/mle/Pivot.py +0 -471
teradataml/analytics/mle/ROC.py +0 -425
teradataml/analytics/mle/RandomSample.py +0 -637
teradataml/analytics/mle/RandomWalkSample.py +0 -490
teradataml/analytics/mle/SAX.py +0 -779
teradataml/analytics/mle/SVMDense.py +0 -677
teradataml/analytics/mle/SVMDensePredict.py +0 -536
teradataml/analytics/mle/SVMDenseSummary.py +0 -437
teradataml/analytics/mle/SVMSparse.py +0 -557
teradataml/analytics/mle/SVMSparsePredict.py +0 -553
teradataml/analytics/mle/SVMSparseSummary.py +0 -435
teradataml/analytics/mle/Sampling.py +0 -549
teradataml/analytics/mle/Scale.py +0 -565
teradataml/analytics/mle/ScaleByPartition.py +0 -496
teradataml/analytics/mle/ScaleMap.py +0 -378
teradataml/analytics/mle/ScaleSummary.py +0 -320
teradataml/analytics/mle/SentenceExtractor.py +0 -363
teradataml/analytics/mle/SentimentEvaluator.py +0 -432
teradataml/analytics/mle/SentimentExtractor.py +0 -578
teradataml/analytics/mle/SentimentTrainer.py +0 -405
teradataml/analytics/mle/SeriesSplitter.py +0 -641
teradataml/analytics/mle/Sessionize.py +0 -475
teradataml/analytics/mle/SimpleMovAvg.py +0 -397
teradataml/analytics/mle/StringSimilarity.py +0 -425
teradataml/analytics/mle/TF.py +0 -389
teradataml/analytics/mle/TFIDF.py +0 -504
teradataml/analytics/mle/TextChunker.py +0 -414
teradataml/analytics/mle/TextClassifier.py +0 -399
teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
teradataml/analytics/mle/TextMorph.py +0 -494
teradataml/analytics/mle/TextParser.py +0 -623
teradataml/analytics/mle/TextTagger.py +0 -530
teradataml/analytics/mle/TextTokenizer.py +0 -502
teradataml/analytics/mle/UnivariateStatistics.py +0 -488
teradataml/analytics/mle/Unpack.py +0 -526
teradataml/analytics/mle/Unpivot.py +0 -438
teradataml/analytics/mle/VarMax.py +0 -776
teradataml/analytics/mle/VectorDistance.py +0 -762
teradataml/analytics/mle/WeightedMovAvg.py +0 -400
teradataml/analytics/mle/XGBoost.py +0 -842
teradataml/analytics/mle/XGBoostPredict.py +0 -627
teradataml/analytics/mle/__init__.py +0 -123
teradataml/analytics/mle/json/adaboost_mle.json +0 -135
teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
teradataml/analytics/mle/json/antiselect_mle.json +0 -34
teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
teradataml/analytics/mle/json/arima_mle.json +0 -172
teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
teradataml/analytics/mle/json/betweenness_mle.json +0 -97
teradataml/analytics/mle/json/burst_mle.json +0 -140
teradataml/analytics/mle/json/ccm_mle.json +0 -124
teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
teradataml/analytics/mle/json/cfilter_mle.json +0 -93
teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
teradataml/analytics/mle/json/closeness_mle.json +0 -104
teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
teradataml/analytics/mle/json/correlation_mle.json +0 -86
teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
teradataml/analytics/mle/json/coxph_mle.json +0 -98
teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
teradataml/analytics/mle/json/dtw_mle.json +0 -97
teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
teradataml/analytics/mle/json/dwt_mle.json +0 -101
teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
teradataml/analytics/mle/json/glm_mle.json +0 -111
teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
teradataml/analytics/mle/json/histogram_mle.json +0 -100
teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
teradataml/analytics/mle/json/idwt_mle.json +0 -66
teradataml/analytics/mle/json/interpolator_mle.json +0 -151
teradataml/analytics/mle/json/kmeans_mle.json +0 -97
teradataml/analytics/mle/json/knn_mle.json +0 -141
teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
teradataml/analytics/mle/json/lar_mle.json +0 -78
teradataml/analytics/mle/json/larpredict_mle.json +0 -69
teradataml/analytics/mle/json/lda_mle.json +0 -130
teradataml/analytics/mle/json/ldainference_mle.json +0 -78
teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
teradataml/analytics/mle/json/linreg_mle.json +0 -42
teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
teradataml/analytics/mle/json/minhash_mle.json +0 -113
teradataml/analytics/mle/json/modularity_mle.json +0 -91
teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
teradataml/analytics/mle/json/ngrams_mle.json +0 -137
teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
teradataml/analytics/mle/json/pack_mle.json +0 -58
teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
teradataml/analytics/mle/json/pagerank_mle.json +0 -81
teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
teradataml/analytics/mle/json/pathstart_mle.json +0 -62
teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
teradataml/analytics/mle/json/pivoting_mle.json +0 -71
teradataml/analytics/mle/json/postagger_mle.json +0 -51
teradataml/analytics/mle/json/randomsample_mle.json +0 -131
teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
teradataml/analytics/mle/json/roc_mle.json +0 -73
teradataml/analytics/mle/json/sampling_mle.json +0 -75
teradataml/analytics/mle/json/sax_mle.json +0 -154
teradataml/analytics/mle/json/scale_mle.json +0 -93
teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
teradataml/analytics/mle/json/scalemap_mle.json +0 -44
teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
teradataml/analytics/mle/json/svmdense_mle.json +0 -165
teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
teradataml/analytics/mle/json/textchunker_mle.json +0 -40
teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
teradataml/analytics/mle/json/textmorph_mle.json +0 -63
teradataml/analytics/mle/json/textparser_mle.json +0 -166
teradataml/analytics/mle/json/texttagger_mle.json +0 -81
teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
teradataml/analytics/mle/json/tf_mle.json +0 -33
teradataml/analytics/mle/json/tfidf_mle.json +0 -34
teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
teradataml/analytics/mle/json/unpack_mle.json +0 -91
teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
teradataml/analytics/mle/json/varmax_mle.json +0 -176
teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
teradataml/analytics/mle/json/xgboost_mle.json +0 -178
teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
teradataml/analytics/sqle/Antiselect.py +0 -321
teradataml/analytics/sqle/Attribution.py +0 -603
teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
teradataml/analytics/sqle/GLMPredict.py +0 -430
teradataml/analytics/sqle/MovingAverage.py +0 -543
teradataml/analytics/sqle/NGramSplitter.py +0 -548
teradataml/analytics/sqle/NPath.py +0 -632
teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
teradataml/analytics/sqle/Pack.py +0 -388
teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
teradataml/analytics/sqle/Sessionize.py +0 -390
teradataml/analytics/sqle/StringSimilarity.py +0 -400
teradataml/analytics/sqle/Unpack.py +0 -503
teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
teradataml/analytics/sqle/json/npath_sqle.json +0 -67
teradataml/analytics/sqle/json/pack_sqle.json +0 -47
teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
teradataml/catalog/model_cataloging.py +0 -980
teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
teradataml/table_operators/sandbox_container_util.py +0 -643
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0

teradataml/hyperparameter_tuner/optimizer.py CHANGED Viewed

@@ -17,7 +17,9 @@ import numpy as np
 import pandas as pd
 import random
 import time
+import threading
 from itertools import product
+from collections import defaultdict
 from teradataml import DataFrame, valib, TeradataMlException
 from teradataml.common.messages import Messages, MessageCodes
 from teradataml.hyperparameter_tuner.utils import _ProgressBar
@@ -171,7 +173,9 @@ class _BaseSearch:
         self.__progress_bar = None
         # '__model_err_records' holds error messages of failed model.
         self.__model_err_records = dict()
+        # '__parallel_stop_event' is used to stop threads in parallel execution.
+        self.__parallel_stop_event = None
         # Get the function name.
         self.__func_name = func._tdml_valib_name if "_VALIB" in str(func.__class__) \
                                                  else func.__name__
@@ -227,6 +231,9 @@ class _BaseSearch:
                                    if self.__func_comparator[self.__evaluation_metric] \
                                    else self.__best_score_ <= self.__early_stop
+        # '_is_time_stoppable' function is to check whether HPT execution reached self.__timeout value.
+        self._is_time_stoppable = lambda : True if time.time() - self.__start_time >= self.__timeout else False
         # Special case comparator for "MPE" metrics.
         # When "curr_score" argument is 'None' then lambda function checks
         # for '_is_early_stoppable'. Otherwise, it checks for '_is_best_metrics'.
@@ -876,10 +883,6 @@ class _BaseSearch:
             self.__sampled_df_mapper[_data_id] = [{train_data_arg:_train_data},
                                                  {test_data_arg:_test_data}]
-        # Update model trainer function parameter grid.
-        self.__update_model_parameters()
     def __update_model_parameters(self):
         """
         DESCRIPTION:
@@ -924,16 +927,13 @@ class _BaseSearch:
                     'data_id': 'DF_1'}
                 ]
         """
         # Get data identifiers.
-        _model_ids = self.__sampled_df_mapper.keys()
+        _model_ids = self.__sampled_df_mapper.keys()
         # Update '_parameter_grid' with data identifiers by performing
         # cartesian product.
         self._parameter_grid = [{"param":param[0] , self.__DATA_ID:param[1]} for \
                                 param in product(self._parameter_grid, _model_ids)]
     def __validate_model_trainer_input_data_argument(self, data, is_optional_arg=True):
         """
         DESCRIPTION:
@@ -1006,6 +1006,7 @@ class _BaseSearch:
             stratify_column=None,
             sample_id_column=None,
             sample_seed=None,
+            max_time=None,
             **kwargs):
         """
         DESCRIPTION:
@@ -1146,6 +1147,12 @@ class _BaseSearch:
                     * Mandatory when "sample_seed" argument is present.
                 Types: str
+            max_time:
+                Optional Argument.
+                Specifies the maximum time for the completion of Hyperparameter tuning execution.
+                Default Value: None
+                Types: int or float
             kwargs:
                 Optional Argument.
                 Specifies the keyword arguments. Accepts additional arguments
@@ -1225,24 +1232,6 @@ class _BaseSearch:
         # Set the flag to notify fit method is called.
         self.__is_fit_called = True
-        if self.__is_trainable:
-            # "data" argument is a required argument for model trainer function
-            # when data argument is not passed with hyperparameters. On other side,
-            # "data" argument will be optional argument when data argument
-            # is passed with hyperparameters.
-            _is_optional_arg = self.__model_trainer_input_data is not None
-            # validate the model trainer function 'data' argument.
-            self.__validate_model_trainer_input_data_argument(data, _is_optional_arg)
-            if not data is None:
-                # '__model_trainer_input_data' is assigned with "data" argument,
-                # when user passes data argument in fit() method.
-                # Note: if user attempts to pass data argument in both "params"
-                # argument as hyperparameters or "data" argument in fit()
-                # method, then latest "data" argument value is considered
-                # for model training.
-                self.__model_trainer_input_data = data
         # Validate "early_stop".
         arg_info_matrix = []
         arg_info_matrix.append(["early_stop", early_stop, True, (int, float)])
@@ -1251,24 +1240,29 @@ class _BaseSearch:
         arg_info_matrix.append(["wait", wait, True, (bool)])
         arg_info_matrix.append(["evaluation_metric", evaluation_metric, True,
                                 (str), True, list(self.__func_comparator)])
+        arg_info_matrix.append(["verbose", verbose, True, (int), True, [0,1,2]])
+        arg_info_matrix.append(["max_time", max_time, True, (int, float)])
         _Validators._validate_function_arguments(arg_info_matrix)
+        # set timeout value.
+        self.__timeout = max_time
+        self._setting_model_trainer_data(data)
         # Set the evaluation metrics.
         if evaluation_metric is not None:
             self.__evaluation_metric = evaluation_metric.upper()
         self.__early_stop = early_stop
         if self.__is_trainable and self.__is_evaluatable and self.__is_sqle_function:
             # When "evaluation_metric" is 'MPE' then use the spl comparators.
             if self.__evaluation_metric == "MPE":
                 self._is_best_metrics = self._is_early_stoppable = self._spl_abs_comparator
             if not isinstance(self.__model_trainer_input_data, dict):
-                # Label the data with unique IDs.
-                _labeled_data = self._add_data_label()
                 # Sample all the labeled data for model training and testing.
-                self.__perform_train_test_sampling(_labeled_data, frac, stratify_column,
+                self.__perform_train_test_sampling(self._labeled_data, frac, stratify_column,
                                                    sample_id_column, sample_seed)
             elif isinstance(self.__model_trainer_input_data, dict):
@@ -1276,6 +1270,8 @@ class _BaseSearch:
                 self.__perform_train_test_sampling(self.__model_trainer_input_data, frac,
                                                    stratify_column, sample_id_column,
                                                    sample_seed)
+            # Update model trainer function parameter grid.
+            self.__update_model_parameters()
             self.__eval_params = kwargs if self.__is_evaluatable else None
@@ -1287,11 +1283,13 @@ class _BaseSearch:
             self.__sampled_df_mapper = self._add_data_label("data")
             # Update model trainer function parameter grid.
             self.__update_model_parameters()
         # Initialize logging.
         if verbose > 0:
             self.__progress_bar = _ProgressBar(jobs=len(self._parameter_grid), verbose=verbose)
         if not run_parallel:
+            # Setting start time of Sequential execution.
+            self.__start_time = time.time() if self.__timeout is not None else None
             # TODO: Factorize the code once parallel execution part is completed in ELE-6154 JIRA.
             # Execute all parameters from populated parameter grid for both trainable
             # and non trainable function.
@@ -1302,8 +1300,8 @@ class _BaseSearch:
                 # trainer function.
                 if self.__early_stop is not None and self.__is_evaluatable:
                     if self.__is_finite and self._is_early_stoppable():
-                        # Terminate HPT execution when the trained model attains the
-                        # specified "__early_stop" value.
+                        # Terminate HPT execution when the trained model attains the
+                        # given "early_stop" value.
                         break
                     elif not self.__is_finite:
                         # Raise error because non-finite values cannot be compared
@@ -1316,6 +1314,10 @@ class _BaseSearch:
                             " when '{metric}' metric results inconsistent value.".format(
                             metric=self.__evaluation_metric))
                         raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
+                if self.__timeout is not None and self._is_time_stoppable():
+                    # Terminate HPT execution when the execution time exceeds the
+                    # given time limit.
+                    break
         else:
             # TODO: Added support for early_stop feature along with concurrency in ELE-6154 JIRA.
@@ -1328,9 +1330,13 @@ class _BaseSearch:
                 _temp_params["model_param"] = param
                 _temp_params.update(kwargs)
                 async_exec_params.append(_temp_params)
+            # Initialize the stopping event
+            self.__parallel_stop_event = threading.Event()
             # let's initialize "_AsyncDBExecutor".
             self._async_executor = _AsyncDBExecutor(wait=wait)
+            # Setting start time of Parallel execution.
+            self.__start_time = time.time() if self.__timeout is not None else None
             # Trigger parallel thread execution.
             self._async_executor.submit(self._execute_fit, *async_exec_params)
@@ -1377,15 +1383,24 @@ class _BaseSearch:
         EXAMPLES:
             >>> self.__model_trainer_routine(param=param, iter=iter, **kwargs)
         """
         # Define model name used for model metadata.
         model_name = self._generate_model_name(iter)
         # Get the unique data identifier present in "model_param".
         _data_id = model_param[self.__DATA_ID]
-        # Retrieve the train and test data using data identifier.
-        _train_data, _test_data =  self.__sampled_df_mapper[_data_id]
         # 'param' variable holds model training parameters and train dataframe.
         # Get the model training parameters.
         param = model_param["param"]
+        # Check the stop_event set or not
+        if self.__parallel_stop_event is not None and self.__parallel_stop_event.is_set():
+            # Update the model metadata for Skip execution.
+            self.__update_model_metadata(model_name, param, "SKIP", 0, _data_id)
+            return
+        # Retrieve the train and test data using data identifier.
+        _train_data, _test_data =  self.__sampled_df_mapper[_data_id]
         # Update model training argument with train DataFrame.
         param.update(_train_data)
         # Update the test DataFrame for model evaluation.
@@ -1418,6 +1433,7 @@ class _BaseSearch:
                 # Default evaluation metric is set to "MAE" for Regression models.
                 if self.__evaluation_metric is None:
                     self.__evaluation_metric = "MAE"
             else:
                 # ClassificationEvaluator results are stored under "output_data"
                 # attribute. "output_data" dataframe 'column 1' contains metrics
@@ -1431,11 +1447,21 @@ class _BaseSearch:
                 # classification models.
                 if self.__evaluation_metric is None:
                     self.__evaluation_metric = "ACCURACY"
             # Update the model metadata for successful model training.
             self.__update_model_metadata(model_name, param, "PASS",
                                          training_time, _data_id,
                                          columns, eval_values)
+            # Check whether self.__parallel_stop_event is None or not
+            if self.__parallel_stop_event is not None:
+                # SET the self.__parallel_stop_event
+                # When trained model evaluation metric value exceeds self.__early_stop
+                # or When execution time exceeds self.__timeout
+                if (self.__early_stop is not None and self._is_early_stoppable())\
+                    or (self.__timeout is not None and self._is_time_stoppable()):
+                    self.__parallel_stop_event.set()
         except Exception as _err_msg:
             # Record error message with corresponding "model_name".
             self.__model_err_records[model_name] = str(_err_msg)
@@ -1513,7 +1539,11 @@ class _BaseSearch:
         else:
             # Initialize param for non-model trainer functions.
             param = model_param
+        # Check the stop_event set or not
+        if self.__parallel_stop_event is not None and self.__parallel_stop_event.is_set():
+            # Update the model metadata for Skip execution.
+            self.__update_model_metadata(model_name, param, "SKIP", 0, _data_id)
+            return
         try:
             # Record starting time of model training.
             start_time = time.perf_counter()
@@ -1541,6 +1571,13 @@ class _BaseSearch:
             # Update the model metadata for failed execution.
             self.__update_model_metadata(model_name, param, "FAIL", training_time, _data_id)
             pass
+        if self.__parallel_stop_event is not None:
+            # SET the self.__parallel_stop_event
+            # When execution time exceeds self.__timeout
+            if self.__timeout is not None and self._is_time_stoppable():
+                self.__parallel_stop_event.set()
     def __update_model_metadata(self, model_name,
@@ -1573,6 +1610,7 @@ class _BaseSearch:
                 Permitted Values:
                     * PASS: Function result present in the vantage.
                     * FAIL: Function execution failed for the chosen parameters.
+                    * SKIP: Function execution skipped for the chosen parameters.
                 Types: str
             data_id:
@@ -1622,7 +1660,6 @@ class _BaseSearch:
         model_metadata = {"MODEL_ID" : model_name,
                           "PARAMETERS" : param,
                           "STATUS" : status}
         if self.__is_trainable:
             # Update "data_id" for model trainer functions.
             model_metadata[self.__DATA_ID.upper()] = data_id
@@ -1664,7 +1701,7 @@ class _BaseSearch:
                 # training best model.
                 self.__best_data_id = data_id
-        if not self.__progress_bar is None:
+        if not self.__progress_bar is None and status != 'SKIP':
             # Update progress bar when logging is required.
             self.__progress_bar.update(msg=_msg)
         # Update "__model_eval_records" with the formatted metadata.
@@ -2057,6 +2094,160 @@ class _BaseSearch:
         # Return list of dictionary containing all possible combinations.
         return [dict(param) for param in product(*param_pairs)]
+    def _data_mapping(self):
+        """
+            DESCRIPTION:
+                Internal function to create a Cartesian product of data mapped with input columns
+                and parameter grid.
+            PARAMETERS:
+                None
+            RETURNS:
+                None
+        """
+        # Get the input columns from the params.
+        input_columns = self.__params.pop("input_columns")
+        # Create a list of dictionaries with data_id and input_columns
+        data_mapping_list = []
+        # Iterate over the labeled data and create a list of dictionaries
+        for data_ids, data in self._labeled_data.items():
+            # Check if all input columns are present in the data
+            for input_cols in input_columns:
+                if all(col in data.columns for col in input_cols):
+                    data_mapping_list.append({'data_id': data_ids,
+                                            'input_columns': input_cols})
+        self._parameter_grid = self.__populate_parameter_grid()
+        cartesian_product = product(self._parameter_grid, data_mapping_list)
+        result_list = []
+        # Iterate over the Cartesian product and construct the desired dictionaries
+        for params, data_mapping in cartesian_product:
+            result_dict = {
+                'param': {**params, 'input_columns': data_mapping['input_columns']},
+                self.__DATA_ID: data_mapping['data_id']
+            }
+            result_list.append(result_dict)
+        self._parameter_grid = result_list
+    def _setting_model_trainer_data(self,
+                                    data=None):
+        """
+        DESCRIPTION:
+            Internal function to set the model trainer input data for model
+            training.
+        PARAMETERS:
+            data:
+                Optional Argument.
+                Specifies the input data used for model training.
+                Note:
+                    * "data" argument is a required argument for model trainer
+                      function when data argument is not passed with hyperparameters.
+                    * When data argument is passed with hyperparameters then
+                      "data" argument is optional.
+                Types: teradataml DataFrame
+        RETURNS:
+            None
+        Example:
+            >>> print(self.__model_trainer_input_data)
+                (   id  admitted       gpa  stats  programming  masters
+                0  19         0  0.051643    0.0          0.0      1.0
+                1   6         1  0.765258    0.5          0.0      1.0
+                2  15         1  1.000000    0.0          0.0      1.0
+                3  32         0  0.746479    0.0          0.5      1.0
+                4  12         1  0.835681    1.0          1.0      0.0
+                5  40         0  0.976526    1.0          0.5      1.0
+                6   7         1  0.215962    1.0          1.0      1.0
+                7  36         0  0.530516    0.0          1.0      0.0
+                8  28         1  0.967136    0.0          0.0      0.0
+                9  17         1  0.920188    0.0          0.0      0.0,
+                    id  admitted       gpa  stats  programming  masters
+                0   4         1  0.765258    0.5          1.0      1.0
+                1   6         1  0.765258    0.5          0.0      1.0
+                2   7         1  0.215962    1.0          1.0      1.0
+                3   8         1  0.812207    0.5          0.0      0.0
+                4  10         1  0.863850    0.0          0.0      0.0
+                5  11         1  0.591549    0.0          0.0      0.0
+                6   9         1  0.915493    0.0          0.0      0.0
+                7   5         0  0.737089    1.0          1.0      0.0
+                8   3         1  0.859155    1.0          0.5      0.0
+                9   2         0  0.887324    0.5          0.5      1.0,
+                    id  admitted       gpa  stats  programming  masters
+                0  23         1  0.807512    0.0          1.0      1.0
+                1  25         1  0.981221    0.0          0.0      0.0
+                2  26         1  0.798122    0.0          0.0      1.0
+                3  27         0  0.981221    0.0          0.0      1.0
+                4  29         0  1.000000    1.0          0.5      1.0
+                5  30         0  0.901408    0.0          1.0      1.0
+                6  28         1  0.967136    0.0          0.0      0.0
+                7  24         1  0.000000    0.0          1.0      0.0
+                8  22         0  0.746479    1.0          0.5      1.0
+                9  21         1  0.938967    1.0          0.5      0.0)
+            >>> print(self._labeled_data)
+                {'DF_0':        id  admitted       gpa  stats  programming  masters
+                            0  26         1  0.798122    0.0          0.0      1.0
+                            1  40         0  0.976526    1.0          0.5      1.0
+                            2   7         1  0.215962    1.0          1.0      1.0
+                            3  19         0  0.051643    0.0          0.0      1.0
+                            4  15         1  1.000000    0.0          0.0      1.0
+                            5  32         0  0.746479    0.0          0.5      1.0
+                            6  38         1  0.366197    0.0          0.5      1.0
+                            7  12         1  0.835681    1.0          1.0      0.0
+                            8   6         1  0.765258    0.5          0.0      1.0
+                            9  36         0  0.530516    0.0          1.0      0.0,
+                'DF_1':         id  admitted       gpa  stats  programming  masters
+                            0   4         1  0.765258    0.5          1.0      1.0
+                            1   6         1  0.765258    0.5          0.0      1.0
+                            2   7         1  0.215962    1.0          1.0      1.0
+                            3   8         1  0.812207    0.5          0.0      0.0
+                            4  10         1  0.863850    0.0          0.0      0.0
+                            5  11         1  0.591549    0.0          0.0      0.0
+                            6   9         1  0.915493    0.0          0.0      0.0
+                            7   5         0  0.737089    1.0          1.0      0.0
+                            8   3         1  0.859155    1.0          0.5      0.0
+                            9   2         0  0.887324    0.5          0.5      1.0,
+                'DF_2':        id  admitted       gpa  stats  programming  masters
+                            0  23         1  0.807512    0.0          1.0      1.0
+                            1  25         1  0.981221    0.0          0.0      0.0
+                            2  26         1  0.798122    0.0          0.0      1.0
+                            3  27         0  0.981221    0.0          0.0      1.0
+                            4  29         0  1.000000    1.0          0.5      1.0
+                            5  30         0  0.901408    0.0          1.0      1.0
+                            6  28         1  0.967136    0.0          0.0      0.0
+                            7  24         1  0.000000    0.0          1.0      0.0
+                            8  22         0  0.746479    1.0          0.5      1.0
+                            9  21         1  0.938967    1.0          0.5      0.0}
+        """
+        if self.__is_trainable:
+            # "data" argument is a required argument for model trainer function
+            # when data argument is not passed with hyperparameters. On other side,
+            # "data" argument will be optional argument when data argument
+            # is passed with hyperparameters.
+            _is_optional_arg = self.__model_trainer_input_data is not None
+            # validate the model trainer function 'data' argument.
+            self.__validate_model_trainer_input_data_argument(data, _is_optional_arg)
+            if not data is None:
+                # '__model_trainer_input_data' is assigned with "data" argument,
+                # when user passes data argument in fit() method.
+                # Note: if user attempts to pass data argument in both "params"
+                # argument as hyperparameters or "data" argument in fit()
+                # method, then latest "data" argument value is considered
+                # for model training.
+                self.__model_trainer_input_data = data
+        if self.__is_trainable and self.__is_evaluatable and self.__is_sqle_function:
+            self._labeled_data = self._add_data_label()
 class GridSearch(_BaseSearch):
@@ -2659,9 +2850,8 @@ class GridSearch(_BaseSearch):
         """
-        self.__params = params
+        self.__params = params.copy()
         super().__init__(func=func, params=self.__params)
         # Populate parameter grid from provided parameter space.
         self.__populate_params_grid()
@@ -2688,87 +2878,381 @@ class GridSearch(_BaseSearch):
         # Since GridSearch works on all parameter combinations. Set
         # all the parameter combinations to the parameter grid.
         self._parameter_grid = self._BaseSearch__populate_parameter_grid()
-class RandomSearch(_BaseSearch):
-    def __init__(self, func, params, n_iter=10):
+    def fit(self,
+            data=None,
+            evaluation_metric=None,
+            early_stop=None,
+            frac=0.8,
+            run_parallel=True,
+            wait=True,
+            verbose=0,
+            stratify_column=None,
+            sample_id_column=None,
+            sample_seed=None,
+            max_time=None,
+            **kwargs):
         """
         DESCRIPTION:
-            RandomSearch algorithm performs random sampling on hyperparameter
-            space to identify optimal hyperparameters. It works for
-            teradataml analytic functions from SQLE, BYOM, VAL and UAF features.
-            teradataml RandomSearch allows user to perform hyperparameter tuning for
-            all model trainer and non-model trainer functions.
-            When used for model trainer functions:
-                * Based on evaluation metrics search determines best model.
-                * All methods and properties can be used.
-            When used for non-model trainer functions:
-                * Only fit() method is supported.
-                * User can choose the best output as they see fit to use this.
-            teradataml RandomSearch also allows user to use input data as the
-            hyperparameter. This option can be suitable when the user wants to
-            identify the best models for a set of input data. When user passes
-            set of data as hyperparameter for model trainer function, the search
-            determines the best data along with the best model based on the
-            evaluation metrics.
+            Function to perform hyperparameter tuning using GridSearch algorithm.
+            Notes:
+                * In the Model trainer function, the best parameters are
+                  selected based on training results.
+                * In the Non model trainer function, First execution parameter
+                  set is selected as the best parameters.
         PARAMETERS:
-            func:
-                Required Argument.
-                Specifies a teradataml analytic function from SQLE, VAL, and UAF.
-                Types:
-                    teradataml Analytic Functions
-                        * Advanced analytic functions
-                        * UAF
-                        * VAL
-                    Refer to display_analytic_functions() function for list of functions.
+            data:
+                Optional Argument.
+                Specifies the input teradataml DataFrame for model trainer function.
+                Notes:
+                    * DataFrame need not to be passed in fit() methods, when "data" is
+                      passed as a model hyperparameters ("params").
+                    * "data" is a required argument for model trainer functions.
+                    * "data" is ignored for non-model trainer functions.
+                    * "data" can be contain single DataFrame or multiple DataFrame.
+                    * One can pass multiple dataframes to "data". Hyperparameter
+                      tuning is performed on all the dataframes for every model
+                      parameter.
+                    * "data" can be either a dictionary OR a tuple OR a dataframe.
+                        * If it is a dictionary then Key represents the label for
+                          dataframe and Value represents the dataframe.
+                        * If it is a tuple then teradataml converts it to dictionary
+                          by generating the labels internally.
+                        * If it is a dataframe then teradataml label it as "DF_0".
+                Types: teradataml DataFrame, dictionary, tuples
-            params:
-                Required Argument.
-                Specifies the parameter(s) of a teradataml analytic function.
-                The parameter(s) must be in dictionary. keys refers to the
-                argument names and values refers to argument values for corresponding
-                arguments.
+            evaluation_metric:
+                Optional Argument.
+                Specifies the evaluation metrics to considered for model
+                evaluation.
                 Notes:
-                    * One can specify the argument value in a tuple to run HPT
-                      with different arguments.
-                    * Model trainer function arguments "id_column", "input_columns",
-                      and "target_columns" must be passed in fit() method.
-                    * All required arguments of non-model trainer function must be
-                      passed while RandomSearch object creation.
-                Types: dict
-            n_iter:
+                    * evaluation_metric applicable for model trainer functions.
+                    * Best model is not selected when evaluation returns
+                      non-finite values.
+                Permitted Values:
+                    * Classification: Accuracy, Micro-Precision, Micro-Recall,
+                                      Micro-F1, Macro-Precision, Macro-Recall,
+                                      Macro-F1, Weighted-Precision,
+                                      Weighted-Recall,
+                                      Weighted-F1.
+                    * Regression: MAE, MSE, MSLE, MAPE, MPE, RMSE, RMSLE, ME,
+                                  R2, EV, MPD, MGD
+                Default Value:
+                    * Classification: Accuracy
+                    * Regression: MAE
+                Types: str
+            early_stop:
                 Optional Argument.
-                Specifies the number of iterations random search need to be performed.
+                Specifies the early stop mechanism value for model trainer
+                functions. Hyperparameter tuning ends model training when
+                the training model evaluation metric attains "early_stop" value.
                 Note:
-                    * n_iter must be less than the size of parameter populations.
-                Default Value: 10
-                Types: int
-        RETURNS:
-            None
+                    * Early stopping supports only when evaluation returns
+                      finite value.
+                Types: int or float
-        RAISES:
-            TeradataMlException, TypeError, ValueError
-        EXAMPLES:
-            >>> # Example 1: Model trainer function. Performing hyperparameter-tuning
-            >>> #            on SVM model trainer function using random search algorithm.
-            >>> # Load the example data.
-            >>> load_example_data("teradataml", ["cal_housing_ex_raw"])
-            >>> # Create teradataml DataFrame objects.
-            >>> data_input = DataFrame.from_table("cal_housing_ex_raw")
+            frac:
+                Optional Argument.
+                Specifies the split percentage of rows to be sampled for training
+                and testing dataset. "frac" argument value must range between (0, 1).
+                Notes:
+                    * This "frac" argument is not supported for non-model trainer
+                      function.
+                    * The "frac" value is considered as train split percentage and
+                      The remaining percentage is taken into account for test splitting.
+                Default Value: 0.8
+                Types: float
-            >>> # Scale "target_columns" with respect to 'STD' value of the column.
-            >>> fit_obj = ScaleFit(data=data_input,
-                                   target_columns=['MedInc', 'HouseAge', 'AveRooms',
-                                                   'AveBedrms', 'Population', 'AveOccup',
-                                                   'Latitude', 'Longitude'],
-                                   scale_method="STD")
+            run_parallel:
+                Optional Argument.
+                Specifies the parallel execution functionality of hyperparameter
+                tuning. When "run_parallel" set to true, model functions are
+                executed concurrently. Otherwise, model functions are executed
+                sequentially.
+                Default Value: True
+                Types: bool
+            wait:
+                Optional Argument.
+                Specifies whether to wait for the completion of execution
+                of hyperparameter tuning or not. When set to False, hyperparameter
+                tuning is executed in the background and user can use "is_running()"
+                method to check the status. Otherwise it waits until the execution
+                is complete to return the control back to user.
+                Default Value: True
+                Type: bool
+            verbose:
+                Optional Argument.
+                Specifies whether to log the model training information and display
+                the logs. When it is set to 1, progress bar alone logged in the
+                console. When it is set to 2, along with progress bar, execution
+                steps and execution time is logged in the console. When it is set
+                to 0, nothing is logged in the console.
+                Note:
+                    * verbose is not significant when "wait" is 'False'.
+                Default Value: 0
+                Type: bool
+            sample_seed:
+                Optional Argument.
+                Specifies the seed value that controls the shuffling applied
+                to the data before applying the Train-Test split. Pass an int for
+                reproducible output across multiple function calls.
+                Notes:
+                    * When the argument is not specified, different
+                      runs of the query generate different outputs.
+                    * It must be in the range [0, 2147483647]
+                    * Seed is supported for stratify column.
+                Types: int
+            stratify_column:
+                Optional Argument.
+                Specifies column name that contains the labels indicating
+                which data needs to be stratified for TrainTest split.
+                Notes:
+                    * seed is supported for stratify column.
+                Types: str
+            sample_id_column:
+                Optional Argument.
+                Specifies the input data column name that has the
+                unique identifier for each row in the input.
+                Note:
+                    * Mandatory when "sample_seed" argument is present.
+                Types: str
+            max_time:
+                Optional Argument.
+                Specifies the maximum time for the completion of Hyperparameter tuning execution.
+                Default Value: None
+                Types: int or float
+            kwargs:
+                Optional Argument.
+                Specifies the keyword arguments. Accepts additional arguments
+                required for the teradataml analytic function.
+        RETURNS:
+            None
+        RAISES:
+            TeradataMlException, TypeError, ValueError
+        EXAMPLES:
+            >>> # Create an instance of the GridSearch algorithm called "optimizer_obj"
+            >>> optimizer_obj = GridSearch(func=SVM, params=params)
+            >>> eval_params = {"id_column": "id",
+                               "accumulate": "MedHouseVal"}
+            >>> # Example 1: Passing single DataFrame for model trainer function.
+            >>> optimizer_obj.fit(data=train_df,
+                                  evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 2: Passing multiple datasets as tuple of DataFrames for
+            >>> #            model trainer function.
+            >>> optimizer_obj.fit(data=(train_df_1, train_df_2),
+                                  evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 3: Passing multiple datasets as dictionary of DataFrames
+            >>> #            for model trainer function.
+            >>> optimizer_obj.fit(data={"Data-1":train_df_1, "Data-2":train_df_2},
+                                  evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 4: No data argument passed in fit() method for model trainer function.
+            >>> #            Note: data argument must be passed while creating HPT object as
+            >>> #                  model hyperparameters.
+            >>> # Define parameter space for model training with "data" argument.
+            >>> params = {"data":(df1, df2),
+                          "input_columns":['MedInc', 'HouseAge', 'AveRooms',
+                                           'AveBedrms', 'Population', 'AveOccup',
+                                           'Latitude', 'Longitude'],
+                          "response_column":"MedHouseVal",
+                          "model_type":"regression",
+                          "batch_size":(11, 50, 75),
+                          "iter_max":(100, 301),
+                          "intercept":False,
+                          "learning_rate":"INVTIME",
+                          "nesterov_optimization":True,
+                          "local_sgd_iterations":1}
+            >>> # Create "optimizer_obj" using GridSearch algorithm and perform
+            >>> # fit() method without any "data" argument for model trainer function.
+            >>> optimizer_obj.fit(evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 5: Do not pass data argument in fit() method for
+            >>> #            non-model trainer function.
+            >>> #            Note: data argument must be passed while creating HPT
+            >>> #                  object as model hyperparameters.
+            >>> optimizer_obj.fit()
+            >>> # Example 6: Passing "verbose" argument value '1' in fit() method to
+            >>> #            display model log.
+            >>> optimizer_obj.fit(data=train_df, evaluation_metric="R2",
+                                  verbose=1, **eval_params)
+                completed: |████████████████████████████████████████████████████████████| 100% - 6/6
+            >>> # Example 7: max_time argument is passed in fit() method.
+            >>> # Model training parameters
+            >>> model_params = {"input_columns":['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
+            ...                 "response_column" :'species',
+            ...                 "max_depth":(5,10,15),
+            ...                 "lambda1" :(1000.0,0.001),
+            ...                 "model_type" :"Classification",
+            ...                 "seed":32,
+            ...                 "shrinkage_factor":0.1,
+            ...                 "iter_num":(5, 50)}
+            >>>
+            >>> eval_params = {"id_column": "id",
+            ...                "accumulate":"species",
+            ...                "model_type":'Classification',
+            ...                "object_order_column":['task_index', 'tree_num', 'iter','class_num', 'tree_order']
+                            }
+            >>>
+            >>> # Import model trainer function and optimizer.
+            >>> from teradataml import XGBoost, GridSearch
+            >>>
+            >>> # Initialize the GridSearch optimizer with model trainer
+            >>> # function and parameter space required for model training.
+            >>> gs_obj = GridSearch(func=XGBoost, params=model_params)
+            >>>
+            >>> # fit() method with max_time argument(in seconds) for model trainer function.
+            >>> gs_obj.fit(data=data, max_time=30, verbose=2, **eval_params)
+                Model_id:XGBOOST_2 - Run time:33.277s - Status:PASS - ACCURACY:0.933
+                Model_id:XGBOOST_3 - Run time:33.276s - Status:PASS - ACCURACY:0.933
+                Model_id:XGBOOST_0 - Run time:33.279s - Status:PASS - ACCURACY:0.967
+                Model_id:XGBOOST_1 - Run time:33.278s - Status:PASS - ACCURACY:0.933
+                Computing: ｜⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾｜ 33% - 4/12
+            >>>
+            >>> # status 'SKIP' for the models which are not completed within the max_time.
+            >>> gs_obj.models
+                    MODEL_ID	DATA_ID	                                       PARAMETERS	STATUS	ACCURACY
+                0	XGBOOST_2	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.933333
+                1	XGBOOST_4	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                2	XGBOOST_5	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                3	XGBOOST_6	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                4	XGBOOST_7	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                5	XGBOOST_8	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                6	XGBOOST_9	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                7	XGBOOST_10	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                8	XGBOOST_11	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                9	XGBOOST_3	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.933333
+                10	XGBOOST_0	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.966667
+                11	XGBOOST_1	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.933333
+        """
+        # Set the flag to discard invalid column parameters.
+        self.discard_invalid_column_params =kwargs.get("discard_invalid_column_params", False)
+        if self.discard_invalid_column_params:
+            # Setting model trainer input data.
+            super()._setting_model_trainer_data(data)
+            # Data mapping for model trainer function.
+            super()._data_mapping()
+            # Setting the lambda function to None.
+            self._setting_model_trainer_data = lambda data: None
+            self._BaseSearch__update_model_parameters = lambda: None
+        # Calling baseSearch class fit method.
+        super().fit(data, evaluation_metric,
+                    early_stop, frac, run_parallel,
+                    wait, verbose, stratify_column,
+                    sample_id_column, sample_seed,
+                    max_time, **kwargs)
+class RandomSearch(_BaseSearch):
+    def __init__(self, func, params, n_iter=10, **kwargs):
+        """
+        DESCRIPTION:
+            RandomSearch algorithm performs random sampling on hyperparameter
+            space to identify optimal hyperparameters. It works for
+            teradataml analytic functions from SQLE, BYOM, VAL and UAF features.
+            teradataml RandomSearch allows user to perform hyperparameter tuning for
+            all model trainer and non-model trainer functions.
+            When used for model trainer functions:
+                * Based on evaluation metrics search determines best model.
+                * All methods and properties can be used.
+            When used for non-model trainer functions:
+                * Only fit() method is supported.
+                * User can choose the best output as they see fit to use this.
+            teradataml RandomSearch also allows user to use input data as the
+            hyperparameter. This option can be suitable when the user wants to
+            identify the best models for a set of input data. When user passes
+            set of data as hyperparameter for model trainer function, the search
+            determines the best data along with the best model based on the
+            evaluation metrics.
+        PARAMETERS:
+            func:
+                Required Argument.
+                Specifies a teradataml analytic function from SQLE, VAL, and UAF.
+                Types:
+                    teradataml Analytic Functions
+                        * Advanced analytic functions
+                        * UAF
+                        * VAL
+                    Refer to display_analytic_functions() function for list of functions.
+            params:
+                Required Argument.
+                Specifies the parameter(s) of a teradataml analytic function.
+                The parameter(s) must be in dictionary. keys refers to the
+                argument names and values refers to argument values for corresponding
+                arguments.
+                Notes:
+                    * One can specify the argument value in a tuple to run HPT
+                      with different arguments.
+                    * Model trainer function arguments "id_column", "input_columns",
+                      and "target_columns" must be passed in fit() method.
+                    * All required arguments of non-model trainer function must be
+                      passed while RandomSearch object creation.
+                Types: dict
+            n_iter:
+                Optional Argument.
+                Specifies the number of iterations random search need to be performed.
+                Note:
+                    * n_iter must be less than the size of parameter populations.
+                Default Value: 10
+                Types: int
+        RETURNS:
+            None
+        RAISES:
+            TeradataMlException, TypeError, ValueError
+        EXAMPLES:
+            >>> # Example 1: Model trainer function. Performing hyperparameter-tuning
+            >>> #            on SVM model trainer function using random search algorithm.
+            >>> # Load the example data.
+            >>> load_example_data("teradataml", ["cal_housing_ex_raw"])
+            >>> # Create teradataml DataFrame objects.
+            >>> data_input = DataFrame.from_table("cal_housing_ex_raw")
+            >>> # Scale "target_columns" with respect to 'STD' value of the column.
+            >>> fit_obj = ScaleFit(data=data_input,
+                                   target_columns=['MedInc', 'HouseAge', 'AveRooms',
+                                                   'AveBedrms', 'Population', 'AveOccup',
+                                                   'Latitude', 'Longitude'],
+                                   scale_method="STD")
             >>> # Transform the data.
             >>> transform_obj = ScaleTransform(data=data_input,
@@ -2953,7 +3437,7 @@ class RandomSearch(_BaseSearch):
         """
-        self.__params = params
+        self.__params = params.copy()
         super().__init__(func=func, params=self.__params)
         # Validate argument 'n_iter'
         awu_matrix = []
@@ -2964,10 +3448,9 @@ class RandomSearch(_BaseSearch):
         # Validates the range of n_iter should be greater than or equal to 1 and
         # less than or equal to parameter space.
         _Validators._validate_argument_range(n_iter, "n_iter", 1, len(parameter_space), True, True)
+        self._n_iter = n_iter
-        self.__populate_params_grid(n_iter, parameter_space)
-    def __populate_params_grid(self, n_iter, parameter_space):
+    def __populate_params_grid(self):
         """
         DESCRIPTION:
             Populate parameter grid based on the search algorithm. In random search,
@@ -2988,6 +3471,313 @@ class RandomSearch(_BaseSearch):
         EXAMPLES:
             >>> self.__populate_params_grid()
         """
         # Populate the parameter space with random and non-repetitive value
-        self._parameter_grid = random.sample(parameter_space, n_iter)
+        if self.discard_invalid_column_params:
+            # Defining the empty data_grouped_dict to group the parameters based on data_id.
+            data_grouped_dict = defaultdict(list)
+            for parameter in self._parameter_grid:
+                # Extracting the data_id from the parameter.
+                data_id = parameter['data_id']
+                # Grouping the parameters based on data_id.
+                data_grouped_dict[data_id].append(parameter)
+            # Converting the grouped dictionary to list.
+            data_grouped_dict = list(data_grouped_dict.values())
+            parameter_grid = []
+            for group in data_grouped_dict:
+                # Randomly selecting the n_iter parameters from the grouped data.
+                tmp = random.sample(group, self._n_iter)
+                parameter_grid.extend(tmp)
+            # Setting the parameter grid.
+            self._parameter_grid = parameter_grid
+        else:
+            self._parameter_grid = random.sample(self.get_parameter_grid(), self._n_iter)
+    def fit(self,
+            data=None,
+            evaluation_metric=None,
+            early_stop=None,
+            frac=0.8,
+            run_parallel=True,
+            wait=True,
+            verbose=0,
+            stratify_column=None,
+            sample_id_column=None,
+            sample_seed=None,
+            max_time=None,
+            **kwargs):
+        """
+        DESCRIPTION:
+            Function to perform hyperparameter tuning using RandomSearch algorithm.
+            Notes:
+                * In the Model trainer function, the best parameters are
+                  selected based on training results.
+                * In the Non model trainer function, First execution parameter
+                  set is selected as the best parameters.
+        PARAMETERS:
+            data:
+                Optional Argument.
+                Specifies the input teradataml DataFrame for model trainer function.
+                Notes:
+                    * DataFrame need not to be passed in fit() methods, when "data" is
+                      passed as a model hyperparameters ("params").
+                    * "data" is a required argument for model trainer functions.
+                    * "data" is ignored for non-model trainer functions.
+                    * "data" can be contain single DataFrame or multiple DataFrame.
+                    * One can pass multiple dataframes to "data". Hyperparameter
+                      tuning is performed on all the dataframes for every model
+                      parameter.
+                    * "data" can be either a dictionary OR a tuple OR a dataframe.
+                        * If it is a dictionary then Key represents the label for
+                          dataframe and Value represents the dataframe.
+                        * If it is a tuple then teradataml converts it to dictionary
+                          by generating the labels internally.
+                        * If it is a dataframe then teradataml label it as "DF_0".
+                Types: teradataml DataFrame, dictionary, tuples
+            evaluation_metric:
+                Optional Argument.
+                Specifies the evaluation metrics to considered for model
+                evaluation.
+                Notes:
+                    * evaluation_metric applicable for model trainer functions.
+                    * Best model is not selected when evaluation returns
+                      non-finite values.
+                Permitted Values:
+                    * Classification: Accuracy, Micro-Precision, Micro-Recall,
+                                      Micro-F1, Macro-Precision, Macro-Recall,
+                                      Macro-F1, Weighted-Precision,
+                                      Weighted-Recall,
+                                      Weighted-F1.
+                    * Regression: MAE, MSE, MSLE, MAPE, MPE, RMSE, RMSLE, ME,
+                                  R2, EV, MPD, MGD
+                Default Value:
+                    * Classification: Accuracy
+                    * Regression: MAE
+                Types: str
+            early_stop:
+                Optional Argument.
+                Specifies the early stop mechanism value for model trainer
+                functions. Hyperparameter tuning ends model training when
+                the training model evaluation metric attains "early_stop" value.
+                Note:
+                    * Early stopping supports only when evaluation returns
+                      finite value.
+                Types: int or float
+            frac:
+                Optional Argument.
+                Specifies the split percentage of rows to be sampled for training
+                and testing dataset. "frac" argument value must range between (0, 1).
+                Notes:
+                    * This "frac" argument is not supported for non-model trainer
+                      function.
+                    * The "frac" value is considered as train split percentage and
+                      The remaining percentage is taken into account for test splitting.
+                Default Value: 0.8
+                Types: float
+            run_parallel:
+                Optional Argument.
+                Specifies the parallel execution functionality of hyperparameter
+                tuning. When "run_parallel" set to true, model functions are
+                executed concurrently. Otherwise, model functions are executed
+                sequentially.
+                Default Value: True
+                Types: bool
+            wait:
+                Optional Argument.
+                Specifies whether to wait for the completion of execution
+                of hyperparameter tuning or not. When set to False, hyperparameter
+                tuning is executed in the background and user can use "is_running()"
+                method to check the status. Otherwise it waits until the execution
+                is complete to return the control back to user.
+                Default Value: True
+                Type: bool
+            verbose:
+                Optional Argument.
+                Specifies whether to log the model training information and display
+                the logs. When it is set to 1, progress bar alone logged in the
+                console. When it is set to 2, along with progress bar, execution
+                steps and execution time is logged in the console. When it is set
+                to 0, nothing is logged in the console.
+                Note:
+                    * verbose is not significant when "wait" is 'False'.
+                Default Value: 0
+                Type: bool
+            sample_seed:
+                Optional Argument.
+                Specifies the seed value that controls the shuffling applied
+                to the data before applying the Train-Test split. Pass an int for
+                reproducible output across multiple function calls.
+                Notes:
+                    * When the argument is not specified, different
+                      runs of the query generate different outputs.
+                    * It must be in the range [0, 2147483647]
+                    * Seed is supported for stratify column.
+                Types: int
+            stratify_column:
+                Optional Argument.
+                Specifies column name that contains the labels indicating
+                which data needs to be stratified for TrainTest split.
+                Notes:
+                    * seed is supported for stratify column.
+                Types: str
+            sample_id_column:
+                Optional Argument.
+                Specifies the input data column name that has the
+                unique identifier for each row in the input.
+                Note:
+                    * Mandatory when "sample_seed" argument is present.
+                Types: str
+            max_time:
+                Optional Argument.
+                Specifies the maximum time for the completion of Hyperparameter tuning execution.
+                Default Value: None
+                Types: int or float
+            kwargs:
+                Optional Argument.
+                Specifies the keyword arguments. Accepts additional arguments
+                required for the teradataml analytic function.
+        RETURNS:
+            None
+        RAISES:
+            TeradataMlException, TypeError, ValueError
+        EXAMPLES:
+            >>> # Create an instance of the RandomSearch algorithm called "optimizer_obj"
+            >>> optimizer_obj = RandomSearch(func=SVM, params=params, n_iter=3)
+            >>> eval_params = {"id_column": "id",
+                               "accumulate": "MedHouseVal"}
+            >>> # Example 1: Passing single DataFrame for model trainer function.
+            >>> optimizer_obj.fit(data=train_df,
+                                  evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 2: Passing multiple datasets as tuple of DataFrames for
+            >>> #            model trainer function.
+            >>> optimizer_obj.fit(data=(train_df_1, train_df_2),
+                                  evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 3: Passing multiple datasets as dictionary of DataFrames
+            >>> #            for model trainer function.
+            >>> optimizer_obj.fit(data={"Data-1":train_df_1, "Data-2":train_df_2},
+                                  evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 4: No data argument passed in fit() method for model trainer function.
+            >>> #            Note: data argument must be passed while creating HPT object as
+            >>> #                  model hyperparameters.
+            >>> # Define parameter space for model training with "data" argument.
+            >>> params = {"data":(df1, df2),
+                          "input_columns":['MedInc', 'HouseAge', 'AveRooms',
+                                           'AveBedrms', 'Population', 'AveOccup',
+                                           'Latitude', 'Longitude'],
+                          "response_column":"MedHouseVal",
+                          "model_type":"regression",
+                          "batch_size":(11, 50, 75),
+                          "iter_max":(100, 301),
+                          "intercept":False,
+                          "learning_rate":"INVTIME",
+                          "nesterov_optimization":True,
+                          "local_sgd_iterations":1}
+            >>> # Create "optimizer_obj" using RandomSearch algorithm and perform
+            >>> # fit() method without any "data" argument for model trainer function.
+            >>> optimizer_obj.fit(evaluation_metric="MAE",
+                                  early_stop=70.9,
+                                  **eval_params)
+            >>> # Example 5: Do not pass data argument in fit() method for
+            >>> #            non-model trainer function.
+            >>> #            Note: data argument must be passed while creating HPT
+            >>> #                  object as model hyperparameters.
+            >>> optimizer_obj.fit()
+            >>> # Example 6: Passing "verbose" argument value '1' in fit() method to
+            >>> #            display model log.
+            >>> optimizer_obj.fit(data=train_df, evaluation_metric="R2",
+                                  verbose=1, **eval_params)
+                completed: |████████████████████████████████████████████████████████████| 100% - 6/6
+            >>> # Example 7: max_time argument is passed in fit() method.
+            >>> # Model training parameters
+            >>> model_params = {"input_columns":['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
+            ...                 "response_column" : 'species',
+            ...                 "max_depth":(5,10,15),
+            ...                 "lambda1" : (1000.0,0.001),
+            ...                 "model_type" :"Classification",
+            ...                 "seed":32,
+            ...                 "shrinkage_factor":0.1,
+            ...                 "iter_num":(5, 50)}
+            >>>
+            >>> eval_params = {"id_column": "id",
+            ...                "accumulate": "species",
+            ...                "model_type":'Classification',
+            ...                "object_order_column":['task_index', 'tree_num', 'iter','class_num', 'tree_order']
+            ...               }
+            >>>
+            >>> # Import model trainer and optimizer
+            >>> from teradataml import XGBoost, RandomSearch
+            >>>
+            >>> # Initialize the RandomSearch optimizer with model trainer
+            >>> # function and parameter space required for model training.
+            >>> rs_obj = RandomSearch(func=XGBoost, params=model_params, n_iter=5)
+            >>>
+            >>> # fit() method with max_time argument(in seconds) for model trainer function.
+            >>> rs_obj.fit(data=data, max_time=30, verbose=2, **eval_params)
+                Model_id:XGBOOST_3 - Run time:28.292s - Status:PASS - ACCURACY:0.8
+                Model_id:XGBOOST_0 - Run time:28.291s - Status:PASS - ACCURACY:0.867
+                Model_id:XGBOOST_2 - Run time:28.289s - Status:PASS - ACCURACY:0.867
+                Model_id:XGBOOST_1 - Run time:28.291s - Status:PASS - ACCURACY:0.867
+                Computing: ｜⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾｜ 80% - 4/5
+            >>>
+            >>> # status 'SKIP' for the models which are not completed within the max_time.
+            >>> rs_obj.models
+                    MODEL_ID	DATA_ID	                                       PARAMETERS	STATUS	ACCURACY
+                0	XGBOOST_3	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.800000
+                1	XGBOOST_4	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	SKIP	NaN
+                2	XGBOOST_0	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.866667
+                3	XGBOOST_2	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.866667
+                4	XGBOOST_1	DF_0	{'input_columns': ['sepal_length', 'sepal_widt...	PASS	0.866667
+        """
+        # Set discard_invalid_column_params flag.
+        self.discard_invalid_column_params =kwargs.get("discard_invalid_column_params", False)
+        if self.discard_invalid_column_params:
+            # Setting model trainer input data
+            super()._setting_model_trainer_data(data)
+            # Mapping the data with input columns
+            super()._data_mapping()
+            # Setting the lambda function to None.
+            self._setting_model_trainer_data = lambda data: None
+            self._BaseSearch__update_model_parameters = lambda: None
+        # Populate parameter grid.
+        self.__populate_params_grid()
+        # Calling baseSearch class fit method.
+        super().fit(data, evaluation_metric, early_stop,
+                    frac, run_parallel, wait, verbose,
+                    stratify_column, sample_id_column,
+                    sample_seed, max_time, **kwargs)

teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

Potentially problematic release.

teradataml 17.20.0.6py3-none-any.whl → 20.0.0.0py3-none-any.whl