teradataml 20.0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +2762 -0
- teradataml/__init__.py +78 -0
- teradataml/_version.py +11 -0
- teradataml/analytics/Transformations.py +2996 -0
- teradataml/analytics/__init__.py +82 -0
- teradataml/analytics/analytic_function_executor.py +2416 -0
- teradataml/analytics/analytic_query_generator.py +1050 -0
- teradataml/analytics/byom/H2OPredict.py +514 -0
- teradataml/analytics/byom/PMMLPredict.py +437 -0
- teradataml/analytics/byom/__init__.py +16 -0
- teradataml/analytics/json_parser/__init__.py +133 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
- teradataml/analytics/json_parser/json_store.py +191 -0
- teradataml/analytics/json_parser/metadata.py +1666 -0
- teradataml/analytics/json_parser/utils.py +805 -0
- teradataml/analytics/meta_class.py +236 -0
- teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
- teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
- teradataml/analytics/sqle/__init__.py +128 -0
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
- teradataml/analytics/table_operator/__init__.py +11 -0
- teradataml/analytics/uaf/__init__.py +82 -0
- teradataml/analytics/utils.py +828 -0
- teradataml/analytics/valib.py +1617 -0
- teradataml/automl/__init__.py +5835 -0
- teradataml/automl/autodataprep/__init__.py +493 -0
- teradataml/automl/custom_json_utils.py +1625 -0
- teradataml/automl/data_preparation.py +1384 -0
- teradataml/automl/data_transformation.py +1254 -0
- teradataml/automl/feature_engineering.py +2273 -0
- teradataml/automl/feature_exploration.py +1873 -0
- teradataml/automl/model_evaluation.py +488 -0
- teradataml/automl/model_training.py +1407 -0
- teradataml/catalog/__init__.py +2 -0
- teradataml/catalog/byom.py +1759 -0
- teradataml/catalog/function_argument_mapper.py +859 -0
- teradataml/catalog/model_cataloging_utils.py +491 -0
- teradataml/clients/__init__.py +0 -0
- teradataml/clients/auth_client.py +137 -0
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/clients/pkce_client.py +481 -0
- teradataml/common/__init__.py +1 -0
- teradataml/common/aed_utils.py +2078 -0
- teradataml/common/bulk_exposed_utils.py +113 -0
- teradataml/common/constants.py +1669 -0
- teradataml/common/deprecations.py +166 -0
- teradataml/common/exceptions.py +147 -0
- teradataml/common/formula.py +743 -0
- teradataml/common/garbagecollector.py +666 -0
- teradataml/common/logger.py +1261 -0
- teradataml/common/messagecodes.py +518 -0
- teradataml/common/messages.py +262 -0
- teradataml/common/pylogger.py +67 -0
- teradataml/common/sqlbundle.py +764 -0
- teradataml/common/td_coltype_code_to_tdtype.py +48 -0
- teradataml/common/utils.py +3166 -0
- teradataml/common/warnings.py +36 -0
- teradataml/common/wrapper_utils.py +625 -0
- teradataml/config/__init__.py +0 -0
- teradataml/config/dummy_file1.cfg +5 -0
- teradataml/config/dummy_file2.cfg +3 -0
- teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
- teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
- teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
- teradataml/context/__init__.py +0 -0
- teradataml/context/aed_context.py +223 -0
- teradataml/context/context.py +1462 -0
- teradataml/data/A_loan.csv +19 -0
- teradataml/data/BINARY_REALS_LEFT.csv +11 -0
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
- teradataml/data/B_loan.csv +49 -0
- teradataml/data/BuoyData2.csv +17 -0
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
- teradataml/data/Convolve2RealsLeft.csv +5 -0
- teradataml/data/Convolve2RealsRight.csv +5 -0
- teradataml/data/Convolve2ValidLeft.csv +11 -0
- teradataml/data/Convolve2ValidRight.csv +11 -0
- teradataml/data/DFFTConv_Real_8_8.csv +65 -0
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/Mall_customer_data.csv +201 -0
- teradataml/data/Orders1_12mf.csv +25 -0
- teradataml/data/Pi_loan.csv +7 -0
- teradataml/data/SMOOTHED_DATA.csv +7 -0
- teradataml/data/TestDFFT8.csv +9 -0
- teradataml/data/TestRiver.csv +109 -0
- teradataml/data/Traindata.csv +28 -0
- teradataml/data/__init__.py +0 -0
- teradataml/data/acf.csv +17 -0
- teradataml/data/adaboost_example.json +34 -0
- teradataml/data/adaboostpredict_example.json +24 -0
- teradataml/data/additional_table.csv +11 -0
- teradataml/data/admissions_test.csv +21 -0
- teradataml/data/admissions_train.csv +41 -0
- teradataml/data/admissions_train_nulls.csv +41 -0
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -0
- teradataml/data/ageandpressure.csv +31 -0
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/antiselect_example.json +36 -0
- teradataml/data/antiselect_input.csv +8 -0
- teradataml/data/antiselect_input_mixed_case.csv +8 -0
- teradataml/data/applicant_external.csv +7 -0
- teradataml/data/applicant_reference.csv +7 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/arima_example.json +9 -0
- teradataml/data/assortedtext_input.csv +8 -0
- teradataml/data/attribution_example.json +34 -0
- teradataml/data/attribution_sample_table.csv +27 -0
- teradataml/data/attribution_sample_table1.csv +6 -0
- teradataml/data/attribution_sample_table2.csv +11 -0
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +43 -0
- teradataml/data/bank_web_clicks2.csv +91 -0
- teradataml/data/bank_web_url.csv +85 -0
- teradataml/data/barrier.csv +2 -0
- teradataml/data/barrier_new.csv +3 -0
- teradataml/data/betweenness_example.json +14 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -0
- teradataml/data/bin_fit_ip.csv +4 -0
- teradataml/data/binary_complex_left.csv +11 -0
- teradataml/data/binary_complex_right.csv +11 -0
- teradataml/data/binary_matrix_complex_left.csv +21 -0
- teradataml/data/binary_matrix_complex_right.csv +21 -0
- teradataml/data/binary_matrix_real_left.csv +21 -0
- teradataml/data/binary_matrix_real_right.csv +21 -0
- teradataml/data/blood2ageandweight.csv +26 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -0
- teradataml/data/burst_data.csv +5 -0
- teradataml/data/burst_example.json +21 -0
- teradataml/data/byom_example.json +34 -0
- teradataml/data/bytes_table.csv +4 -0
- teradataml/data/cal_housing_ex_raw.csv +70 -0
- teradataml/data/callers.csv +7 -0
- teradataml/data/calls.csv +10 -0
- teradataml/data/cars_hist.csv +33 -0
- teradataml/data/cat_table.csv +25 -0
- teradataml/data/ccm_example.json +32 -0
- teradataml/data/ccm_input.csv +91 -0
- teradataml/data/ccm_input2.csv +13 -0
- teradataml/data/ccmexample.csv +101 -0
- teradataml/data/ccmprepare_example.json +9 -0
- teradataml/data/ccmprepare_input.csv +91 -0
- teradataml/data/cfilter_example.json +12 -0
- teradataml/data/changepointdetection_example.json +18 -0
- teradataml/data/changepointdetectionrt_example.json +8 -0
- teradataml/data/chi_sq.csv +3 -0
- teradataml/data/churn_data.csv +14 -0
- teradataml/data/churn_emission.csv +35 -0
- teradataml/data/churn_initial.csv +3 -0
- teradataml/data/churn_state_transition.csv +5 -0
- teradataml/data/citedges_2.csv +745 -0
- teradataml/data/citvertices_2.csv +1210 -0
- teradataml/data/clicks2.csv +16 -0
- teradataml/data/clickstream.csv +13 -0
- teradataml/data/clickstream1.csv +11 -0
- teradataml/data/closeness_example.json +16 -0
- teradataml/data/complaints.csv +21 -0
- teradataml/data/complaints_mini.csv +3 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_testtoken.csv +224 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/complaints_tokens_test.csv +353 -0
- teradataml/data/complaints_traintoken.csv +472 -0
- teradataml/data/computers_category.csv +1001 -0
- teradataml/data/computers_test1.csv +1252 -0
- teradataml/data/computers_train1.csv +5009 -0
- teradataml/data/computers_train1_clustered.csv +5009 -0
- teradataml/data/confusionmatrix_example.json +9 -0
- teradataml/data/conversion_event_table.csv +3 -0
- teradataml/data/corr_input.csv +17 -0
- teradataml/data/correlation_example.json +11 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/coxhazardratio_example.json +39 -0
- teradataml/data/coxph_example.json +15 -0
- teradataml/data/coxsurvival_example.json +28 -0
- teradataml/data/cpt.csv +41 -0
- teradataml/data/credit_ex_merged.csv +45 -0
- teradataml/data/creditcard_data.csv +1001 -0
- teradataml/data/customer_loyalty.csv +301 -0
- teradataml/data/customer_loyalty_newseq.csv +31 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +173 -0
- teradataml/data/decisionforest_example.json +37 -0
- teradataml/data/decisionforestpredict_example.json +38 -0
- teradataml/data/decisiontree_example.json +21 -0
- teradataml/data/decisiontreepredict_example.json +45 -0
- teradataml/data/dfft2_size4_real.csv +17 -0
- teradataml/data/dfft2_test_matrix16.csv +17 -0
- teradataml/data/dfft2conv_real_4_4.csv +65 -0
- teradataml/data/diabetes.csv +443 -0
- teradataml/data/diabetes_test.csv +89 -0
- teradataml/data/dict_table.csv +5 -0
- teradataml/data/docperterm_table.csv +4 -0
- teradataml/data/docs/__init__.py +1 -0
- teradataml/data/docs/byom/__init__.py +0 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
- teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
- teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
- teradataml/data/docs/byom/docs/__init__.py +0 -0
- teradataml/data/docs/sqle/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
- teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
- teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/tableoperator/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
- teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/uaf/__init__.py +0 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
- teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
- teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
- teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
- teradataml/data/dtw_example.json +18 -0
- teradataml/data/dtw_t1.csv +11 -0
- teradataml/data/dtw_t2.csv +4 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt2d_example.json +16 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_example.json +15 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/dwt_filter_dim.csv +5 -0
- teradataml/data/emission.csv +9 -0
- teradataml/data/emp_table_by_dept.csv +19 -0
- teradataml/data/employee_info.csv +4 -0
- teradataml/data/employee_table.csv +6 -0
- teradataml/data/excluding_event_table.csv +2 -0
- teradataml/data/finance_data.csv +6 -0
- teradataml/data/finance_data2.csv +61 -0
- teradataml/data/finance_data3.csv +93 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -0
- teradataml/data/fmeasure_example.json +12 -0
- teradataml/data/followers_leaders.csv +10 -0
- teradataml/data/fpgrowth_example.json +12 -0
- teradataml/data/frequentpaths_example.json +29 -0
- teradataml/data/friends.csv +9 -0
- teradataml/data/fs_input.csv +33 -0
- teradataml/data/fs_input1.csv +33 -0
- teradataml/data/genData.csv +513 -0
- teradataml/data/geodataframe_example.json +40 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -0
- teradataml/data/glm_example.json +56 -0
- teradataml/data/glml1l2_example.json +28 -0
- teradataml/data/glml1l2predict_example.json +54 -0
- teradataml/data/glmpredict_example.json +54 -0
- teradataml/data/gq_t1.csv +21 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/hconvolve_complex_right.csv +5 -0
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
- teradataml/data/histogram_example.json +12 -0
- teradataml/data/hmmdecoder_example.json +79 -0
- teradataml/data/hmmevaluator_example.json +25 -0
- teradataml/data/hmmsupervised_example.json +10 -0
- teradataml/data/hmmunsupervised_example.json +8 -0
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/house_values.csv +12 -0
- teradataml/data/house_values2.csv +13 -0
- teradataml/data/housing_cat.csv +7 -0
- teradataml/data/housing_data.csv +9 -0
- teradataml/data/housing_test.csv +47 -0
- teradataml/data/housing_test_binary.csv +47 -0
- teradataml/data/housing_train.csv +493 -0
- teradataml/data/housing_train_attribute.csv +5 -0
- teradataml/data/housing_train_binary.csv +437 -0
- teradataml/data/housing_train_parameter.csv +2 -0
- teradataml/data/housing_train_response.csv +493 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -0
- teradataml/data/ibm_stock1.csv +370 -0
- teradataml/data/identitymatch_example.json +22 -0
- teradataml/data/idf_table.csv +4 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/impressions.csv +101 -0
- teradataml/data/inflation.csv +21 -0
- teradataml/data/initial.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +13 -0
- teradataml/data/insurance.csv +1339 -0
- teradataml/data/interpolator_example.json +13 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/iris_altinput.csv +481 -0
- teradataml/data/iris_attribute_output.csv +8 -0
- teradataml/data/iris_attribute_test.csv +121 -0
- teradataml/data/iris_attribute_train.csv +481 -0
- teradataml/data/iris_category_expect_predict.csv +31 -0
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -0
- teradataml/data/iris_response_train.csv +121 -0
- teradataml/data/iris_test.csv +31 -0
- teradataml/data/iris_train.csv +121 -0
- teradataml/data/join_table1.csv +4 -0
- teradataml/data/join_table2.csv +4 -0
- teradataml/data/jsons/anly_function_name.json +7 -0
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/dataikupredict.json +148 -0
- teradataml/data/jsons/byom/datarobotpredict.json +147 -0
- teradataml/data/jsons/byom/h2opredict.json +195 -0
- teradataml/data/jsons/byom/onnxembeddings.json +267 -0
- teradataml/data/jsons/byom/onnxpredict.json +187 -0
- teradataml/data/jsons/byom/pmmlpredict.json +147 -0
- teradataml/data/jsons/paired_functions.json +450 -0
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/kmeans_example.json +23 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +51 -0
- teradataml/data/knn_example.json +19 -0
- teradataml/data/knnrecommender_example.json +7 -0
- teradataml/data/knnrecommenderpredict_example.json +12 -0
- teradataml/data/lar_example.json +17 -0
- teradataml/data/larpredict_example.json +30 -0
- teradataml/data/lc_new_predictors.csv +5 -0
- teradataml/data/lc_new_reference.csv +9 -0
- teradataml/data/lda_example.json +9 -0
- teradataml/data/ldainference_example.json +15 -0
- teradataml/data/ldatopicsummary_example.json +9 -0
- teradataml/data/levendist_input.csv +13 -0
- teradataml/data/levenshteindistance_example.json +10 -0
- teradataml/data/linreg_example.json +10 -0
- teradataml/data/load_example_data.py +350 -0
- teradataml/data/loan_prediction.csv +295 -0
- teradataml/data/lungcancer.csv +138 -0
- teradataml/data/mappingdata.csv +12 -0
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/milk_timeseries.csv +157 -0
- teradataml/data/min_max_titanic.csv +4 -0
- teradataml/data/minhash_example.json +6 -0
- teradataml/data/ml_ratings.csv +7547 -0
- teradataml/data/ml_ratings_10.csv +2445 -0
- teradataml/data/mobile_data.csv +13 -0
- teradataml/data/model1_table.csv +5 -0
- teradataml/data/model2_table.csv +5 -0
- teradataml/data/models/License_file.txt +1 -0
- teradataml/data/models/License_file_empty.txt +0 -0
- teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
- teradataml/data/models/dr_iris_rf +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
- teradataml/data/models/iris_db_glm_model.pmml +57 -0
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
- teradataml/data/models/iris_kmeans_model +0 -0
- teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
- teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
- teradataml/data/modularity_example.json +12 -0
- teradataml/data/movavg_example.json +8 -0
- teradataml/data/mtx1.csv +7 -0
- teradataml/data/mtx2.csv +13 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -0
- teradataml/data/naivebayes_example.json +10 -0
- teradataml/data/naivebayespredict_example.json +19 -0
- teradataml/data/naivebayestextclassifier2_example.json +7 -0
- teradataml/data/naivebayestextclassifier_example.json +8 -0
- teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
- teradataml/data/name_Find_configure.csv +10 -0
- teradataml/data/namedentityfinder_example.json +14 -0
- teradataml/data/namedentityfinderevaluator_example.json +10 -0
- teradataml/data/namedentityfindertrainer_example.json +6 -0
- teradataml/data/nb_iris_input_test.csv +31 -0
- teradataml/data/nb_iris_input_train.csv +121 -0
- teradataml/data/nbp_iris_model.csv +13 -0
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_extractor_text.csv +2 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/ner_sports_test2.csv +29 -0
- teradataml/data/ner_sports_train.csv +501 -0
- teradataml/data/nerevaluator_example.json +6 -0
- teradataml/data/nerextractor_example.json +18 -0
- teradataml/data/nermem_sports_test.csv +18 -0
- teradataml/data/nermem_sports_train.csv +51 -0
- teradataml/data/nertrainer_example.json +7 -0
- teradataml/data/ngrams_example.json +7 -0
- teradataml/data/notebooks/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
- teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
- teradataml/data/npath_example.json +23 -0
- teradataml/data/ntree_example.json +14 -0
- teradataml/data/numeric_strings.csv +5 -0
- teradataml/data/numerics.csv +4 -0
- teradataml/data/ocean_buoy.csv +17 -0
- teradataml/data/ocean_buoy2.csv +17 -0
- teradataml/data/ocean_buoys.csv +28 -0
- teradataml/data/ocean_buoys2.csv +10 -0
- teradataml/data/ocean_buoys_nonpti.csv +28 -0
- teradataml/data/ocean_buoys_seq.csv +29 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -0
- teradataml/data/orders1.csv +11 -0
- teradataml/data/orders1_12.csv +13 -0
- teradataml/data/orders_ex.csv +4 -0
- teradataml/data/pack_example.json +9 -0
- teradataml/data/package_tracking.csv +19 -0
- teradataml/data/package_tracking_pti.csv +19 -0
- teradataml/data/pagerank_example.json +13 -0
- teradataml/data/paragraphs_input.csv +6 -0
- teradataml/data/pathanalyzer_example.json +8 -0
- teradataml/data/pathgenerator_example.json +8 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/phrases.csv +7 -0
- teradataml/data/pivot_example.json +9 -0
- teradataml/data/pivot_input.csv +22 -0
- teradataml/data/playerRating.csv +31 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/postagger_example.json +7 -0
- teradataml/data/posttagger_output.csv +44 -0
- teradataml/data/production_data.csv +17 -0
- teradataml/data/production_data2.csv +7 -0
- teradataml/data/randomsample_example.json +32 -0
- teradataml/data/randomwalksample_example.json +9 -0
- teradataml/data/rank_table.csv +6 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/ref_mobile_data.csv +4 -0
- teradataml/data/ref_mobile_data_dense.csv +2 -0
- teradataml/data/ref_url.csv +17 -0
- teradataml/data/restaurant_reviews.csv +7 -0
- teradataml/data/retail_churn_table.csv +27772 -0
- teradataml/data/river_data.csv +145 -0
- teradataml/data/roc_example.json +8 -0
- teradataml/data/roc_input.csv +101 -0
- teradataml/data/rule_inputs.csv +6 -0
- teradataml/data/rule_table.csv +2 -0
- teradataml/data/sales.csv +7 -0
- teradataml/data/sales_transaction.csv +501 -0
- teradataml/data/salesdata.csv +342 -0
- teradataml/data/sample_cities.csv +3 -0
- teradataml/data/sample_shapes.csv +11 -0
- teradataml/data/sample_streets.csv +3 -0
- teradataml/data/sampling_example.json +16 -0
- teradataml/data/sax_example.json +17 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -0
- teradataml/data/scale_housing.csv +11 -0
- teradataml/data/scale_housing_test.csv +6 -0
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -0
- teradataml/data/scalebypartition_example.json +13 -0
- teradataml/data/scalemap_example.json +13 -0
- teradataml/data/scalesummary_example.json +12 -0
- teradataml/data/score_category.csv +101 -0
- teradataml/data/score_summary.csv +4 -0
- teradataml/data/script_example.json +10 -0
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/lightgbm/dataset.template +175 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +16 -0
- teradataml/data/scripts/mapper_replace.py +16 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/seeds.csv +10 -0
- teradataml/data/sentenceextractor_example.json +7 -0
- teradataml/data/sentiment_extract_input.csv +11 -0
- teradataml/data/sentiment_train.csv +16 -0
- teradataml/data/sentiment_word.csv +20 -0
- teradataml/data/sentiment_word_input.csv +20 -0
- teradataml/data/sentimentextractor_example.json +24 -0
- teradataml/data/sentimenttrainer_example.json +8 -0
- teradataml/data/sequence_table.csv +10 -0
- teradataml/data/seriessplitter_example.json +8 -0
- teradataml/data/sessionize_example.json +17 -0
- teradataml/data/sessionize_table.csv +116 -0
- teradataml/data/setop_test1.csv +24 -0
- teradataml/data/setop_test2.csv +22 -0
- teradataml/data/soc_nw_edges.csv +11 -0
- teradataml/data/soc_nw_vertices.csv +8 -0
- teradataml/data/souvenir_timeseries.csv +168 -0
- teradataml/data/sparse_iris_attribute.csv +5 -0
- teradataml/data/sparse_iris_test.csv +121 -0
- teradataml/data/sparse_iris_train.csv +601 -0
- teradataml/data/star1.csv +6 -0
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/state_transition.csv +5 -0
- teradataml/data/stock_data.csv +53 -0
- teradataml/data/stock_movement.csv +11 -0
- teradataml/data/stock_vol.csv +76 -0
- teradataml/data/stop_words.csv +8 -0
- teradataml/data/store_sales.csv +37 -0
- teradataml/data/stringsimilarity_example.json +8 -0
- teradataml/data/strsimilarity_input.csv +13 -0
- teradataml/data/students.csv +101 -0
- teradataml/data/svm_iris_input_test.csv +121 -0
- teradataml/data/svm_iris_input_train.csv +481 -0
- teradataml/data/svm_iris_model.csv +7 -0
- teradataml/data/svmdense_example.json +10 -0
- teradataml/data/svmdensepredict_example.json +19 -0
- teradataml/data/svmsparse_example.json +8 -0
- teradataml/data/svmsparsepredict_example.json +14 -0
- teradataml/data/svmsparsesummary_example.json +8 -0
- teradataml/data/target_mobile_data.csv +13 -0
- teradataml/data/target_mobile_data_dense.csv +5 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/templatedata.csv +1201 -0
- teradataml/data/templates/open_source_ml.json +11 -0
- teradataml/data/teradata_icon.ico +0 -0
- teradataml/data/teradataml_example.json +1473 -0
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -0
- teradataml/data/test_pacf_12.csv +37 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -0
- teradataml/data/text_inputs.csv +6 -0
- teradataml/data/textchunker_example.json +8 -0
- teradataml/data/textclassifier_example.json +7 -0
- teradataml/data/textclassifier_input.csv +7 -0
- teradataml/data/textclassifiertrainer_example.json +7 -0
- teradataml/data/textmorph_example.json +11 -0
- teradataml/data/textparser_example.json +15 -0
- teradataml/data/texttagger_example.json +12 -0
- teradataml/data/texttokenizer_example.json +7 -0
- teradataml/data/texttrainer_input.csv +11 -0
- teradataml/data/tf_example.json +7 -0
- teradataml/data/tfidf_example.json +14 -0
- teradataml/data/tfidf_input1.csv +201 -0
- teradataml/data/tfidf_train.csv +6 -0
- teradataml/data/time_table1.csv +535 -0
- teradataml/data/time_table2.csv +14 -0
- teradataml/data/timeseriesdata.csv +1601 -0
- teradataml/data/timeseriesdatasetsd4.csv +105 -0
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic.csv +892 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/token_table.csv +696 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +28 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/transformation_table.csv +6 -0
- teradataml/data/transformation_table_new.csv +2 -0
- teradataml/data/tv_spots.csv +16 -0
- teradataml/data/twod_climate_data.csv +117 -0
- teradataml/data/uaf_example.json +529 -0
- teradataml/data/univariatestatistics_example.json +9 -0
- teradataml/data/unpack_example.json +10 -0
- teradataml/data/unpivot_example.json +25 -0
- teradataml/data/unpivot_input.csv +8 -0
- teradataml/data/url_data.csv +10 -0
- teradataml/data/us_air_pass.csv +37 -0
- teradataml/data/us_population.csv +624 -0
- teradataml/data/us_states_shapes.csv +52 -0
- teradataml/data/varmax_example.json +18 -0
- teradataml/data/vectordistance_example.json +30 -0
- teradataml/data/ville_climatedata.csv +121 -0
- teradataml/data/ville_tempdata.csv +12 -0
- teradataml/data/ville_tempdata1.csv +12 -0
- teradataml/data/ville_temperature.csv +11 -0
- teradataml/data/waveletTable.csv +1605 -0
- teradataml/data/waveletTable2.csv +1605 -0
- teradataml/data/weightedmovavg_example.json +9 -0
- teradataml/data/wft_testing.csv +5 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +6 -0
- teradataml/data/word_embed_input_table2.csv +5 -0
- teradataml/data/word_embed_model.csv +23 -0
- teradataml/data/words_input.csv +13 -0
- teradataml/data/xconvolve_complex_left.csv +6 -0
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
- teradataml/data/xgboost_example.json +36 -0
- teradataml/data/xgboostpredict_example.json +32 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/__init__.py +0 -0
- teradataml/dataframe/copy_to.py +2446 -0
- teradataml/dataframe/data_transfer.py +2840 -0
- teradataml/dataframe/dataframe.py +20908 -0
- teradataml/dataframe/dataframe_utils.py +2114 -0
- teradataml/dataframe/fastload.py +794 -0
- teradataml/dataframe/functions.py +2110 -0
- teradataml/dataframe/indexer.py +424 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +1171 -0
- teradataml/dataframe/sql.py +10904 -0
- teradataml/dataframe/sql_function_parameters.py +440 -0
- teradataml/dataframe/sql_functions.py +652 -0
- teradataml/dataframe/sql_interfaces.py +220 -0
- teradataml/dataframe/vantage_function_types.py +675 -0
- teradataml/dataframe/window.py +694 -0
- teradataml/dbutils/__init__.py +3 -0
- teradataml/dbutils/dbutils.py +2871 -0
- teradataml/dbutils/filemgr.py +318 -0
- teradataml/gen_ai/__init__.py +2 -0
- teradataml/gen_ai/convAI.py +473 -0
- teradataml/geospatial/__init__.py +4 -0
- teradataml/geospatial/geodataframe.py +1105 -0
- teradataml/geospatial/geodataframecolumn.py +392 -0
- teradataml/geospatial/geometry_types.py +926 -0
- teradataml/hyperparameter_tuner/__init__.py +1 -0
- teradataml/hyperparameter_tuner/optimizer.py +4115 -0
- teradataml/hyperparameter_tuner/utils.py +303 -0
- teradataml/lib/__init__.py +0 -0
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/lib/libaed_0_1_ppc64le.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/_base.py +1321 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/_constants.py +61 -0
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +267 -0
- teradataml/options/__init__.py +148 -0
- teradataml/options/configure.py +489 -0
- teradataml/options/display.py +187 -0
- teradataml/plot/__init__.py +3 -0
- teradataml/plot/axis.py +1427 -0
- teradataml/plot/constants.py +15 -0
- teradataml/plot/figure.py +431 -0
- teradataml/plot/plot.py +810 -0
- teradataml/plot/query_generator.py +83 -0
- teradataml/plot/subplot.py +216 -0
- teradataml/scriptmgmt/UserEnv.py +4273 -0
- teradataml/scriptmgmt/__init__.py +3 -0
- teradataml/scriptmgmt/lls_utils.py +2157 -0
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/__init__.py +0 -0
- teradataml/series/series.py +537 -0
- teradataml/series/series_utils.py +71 -0
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +658 -0
- teradataml/store/feature_store/feature_store.py +4814 -0
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +7330 -0
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/Apply.py +979 -0
- teradataml/table_operators/Script.py +1739 -0
- teradataml/table_operators/TableOperator.py +1343 -0
- teradataml/table_operators/__init__.py +2 -0
- teradataml/table_operators/apply_query_generator.py +262 -0
- teradataml/table_operators/query_generator.py +493 -0
- teradataml/table_operators/table_operator_query_generator.py +462 -0
- teradataml/table_operators/table_operator_util.py +726 -0
- teradataml/table_operators/templates/dataframe_apply.template +184 -0
- teradataml/table_operators/templates/dataframe_map.template +176 -0
- teradataml/table_operators/templates/dataframe_register.template +73 -0
- teradataml/table_operators/templates/dataframe_udf.template +67 -0
- teradataml/table_operators/templates/script_executor.template +170 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +53 -0
- teradataml/utils/__init__.py +0 -0
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +943 -0
- teradataml/utils/internal_buffer.py +122 -0
- teradataml/utils/print_versions.py +206 -0
- teradataml/utils/utils.py +451 -0
- teradataml/utils/validators.py +3305 -0
- teradataml-20.0.0.8.dist-info/METADATA +2804 -0
- teradataml-20.0.0.8.dist-info/RECORD +1208 -0
- teradataml-20.0.0.8.dist-info/WHEEL +5 -0
- teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
- teradataml-20.0.0.8.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,1407 @@
|
|
|
1
|
+
# ##################################################################
|
|
2
|
+
#
|
|
3
|
+
# Copyright 2025 Teradata. All rights reserved.
|
|
4
|
+
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
5
|
+
#
|
|
6
|
+
# Primary Owner: Sweta Shaw
|
|
7
|
+
# Email Id: Sweta.Shaw@Teradata.com
|
|
8
|
+
#
|
|
9
|
+
# Secondary Owner: Akhil Bisht
|
|
10
|
+
# Email Id: AKHIL.BISHT@Teradata.com
|
|
11
|
+
#
|
|
12
|
+
# Version: 1.1
|
|
13
|
+
# Function Version: 1.0
|
|
14
|
+
# ##################################################################
|
|
15
|
+
|
|
16
|
+
# Python libraries
|
|
17
|
+
import concurrent.futures
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
+
import math
|
|
20
|
+
import pandas as pd
|
|
21
|
+
from itertools import product
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
# Teradata libraries
|
|
25
|
+
from teradataml.context import context as tdmlctx
|
|
26
|
+
from teradataml.dataframe.copy_to import copy_to_sql
|
|
27
|
+
from teradataml.dataframe.dataframe import DataFrame
|
|
28
|
+
from teradataml import execute_sql, get_connection
|
|
29
|
+
from teradataml import configure, SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
|
|
30
|
+
from teradataml.utils.validators import _Validators
|
|
31
|
+
from teradataml.common.utils import UtilFuncs
|
|
32
|
+
from teradataml.common.constants import TeradataConstants, AutoMLConstants
|
|
33
|
+
|
|
34
|
+
class _ModelTraining:
|
|
35
|
+
|
|
36
|
+
def __init__(self,
|
|
37
|
+
data,
|
|
38
|
+
target_column,
|
|
39
|
+
id_column,
|
|
40
|
+
model_list,
|
|
41
|
+
verbose=0,
|
|
42
|
+
features=None,
|
|
43
|
+
task_type="Regression",
|
|
44
|
+
custom_data = None,
|
|
45
|
+
data_transform_dict=None,
|
|
46
|
+
**kwargs):
|
|
47
|
+
"""
|
|
48
|
+
DESCRIPTION:
|
|
49
|
+
Function initializes the data, target column, features and models
|
|
50
|
+
for model training.
|
|
51
|
+
|
|
52
|
+
PARAMETERS:
|
|
53
|
+
data:
|
|
54
|
+
Required Argument.
|
|
55
|
+
Specifies the dataset for model training phase.
|
|
56
|
+
Types: teradataml Dataframe
|
|
57
|
+
|
|
58
|
+
target_column:
|
|
59
|
+
Required Argument. (Not required for Clustering task_type)
|
|
60
|
+
Specifies the target column present inside the dataset.
|
|
61
|
+
Types: str
|
|
62
|
+
|
|
63
|
+
id_column:
|
|
64
|
+
Required Argument.
|
|
65
|
+
Specifies the name of the unique identifier column in "data".
|
|
66
|
+
Types: str
|
|
67
|
+
|
|
68
|
+
model_list:
|
|
69
|
+
Required Argument.
|
|
70
|
+
Specifies the list of models to be used for model training.
|
|
71
|
+
Types: list
|
|
72
|
+
|
|
73
|
+
verbose:
|
|
74
|
+
Optional Argument.
|
|
75
|
+
Specifies the detailed execution steps based on verbose level.
|
|
76
|
+
Default Value: 0
|
|
77
|
+
Permitted Values:
|
|
78
|
+
* 0: prints the progress bar and leaderboard
|
|
79
|
+
* 1: prints the execution steps of AutoML.
|
|
80
|
+
* 2: prints the intermediate data between the
|
|
81
|
+
execution of each step of AutoML.
|
|
82
|
+
Types: int
|
|
83
|
+
|
|
84
|
+
features:
|
|
85
|
+
Required Argument.
|
|
86
|
+
Specifies the list of selected feature by rfe, lasso and pca
|
|
87
|
+
respectively in this order.
|
|
88
|
+
Types: list of list of strings (str)
|
|
89
|
+
|
|
90
|
+
task_type:
|
|
91
|
+
Required Argument.
|
|
92
|
+
Specifies the task type for AutoML, whether to apply regresion
|
|
93
|
+
or classification or clustering on the provived dataset.
|
|
94
|
+
Default Value: "Regression"
|
|
95
|
+
Permitted Values: "Regression", "Classification", "Clustering"
|
|
96
|
+
Types: str
|
|
97
|
+
|
|
98
|
+
custom_data:
|
|
99
|
+
Optional Argument.
|
|
100
|
+
Specifies json object containing user customized input.
|
|
101
|
+
Types: json object
|
|
102
|
+
|
|
103
|
+
data_transform_dict:
|
|
104
|
+
Optional Argument.
|
|
105
|
+
Specifies a dictionary containing data transformation parameters.
|
|
106
|
+
Types: dict
|
|
107
|
+
|
|
108
|
+
**kwargs:
|
|
109
|
+
Specifies the additional arguments for model training. Below
|
|
110
|
+
are the additional arguments:
|
|
111
|
+
volatile:
|
|
112
|
+
Optional Argument.
|
|
113
|
+
Specifies whether to put the interim results of the
|
|
114
|
+
functions in a volatile table or not. When set to
|
|
115
|
+
True, results are stored in a volatile table,
|
|
116
|
+
otherwise not.
|
|
117
|
+
Default Value: False
|
|
118
|
+
Types: bool
|
|
119
|
+
|
|
120
|
+
persist:
|
|
121
|
+
Optional Argument.
|
|
122
|
+
Specifies whether to persist the interim results of the
|
|
123
|
+
functions in a table or not. When set to True,
|
|
124
|
+
results are persisted in a table; otherwise,
|
|
125
|
+
results are garbage collected at the end of the
|
|
126
|
+
session.
|
|
127
|
+
Default Value: False
|
|
128
|
+
Types: bool
|
|
129
|
+
|
|
130
|
+
seed:
|
|
131
|
+
Optional Argument.
|
|
132
|
+
Specifies the random seed for reproducibility.
|
|
133
|
+
Default Value: 42
|
|
134
|
+
Types: int
|
|
135
|
+
|
|
136
|
+
cluster:
|
|
137
|
+
Optional Argument.
|
|
138
|
+
Specifies whether to apply clustering techniques.
|
|
139
|
+
Default Value: False
|
|
140
|
+
Types: bool
|
|
141
|
+
|
|
142
|
+
enable_lasso:
|
|
143
|
+
Optional Argument.
|
|
144
|
+
Specifies whether to use lasso regression for feature selection.
|
|
145
|
+
By default, only RFE and PCA are used for feature selection.
|
|
146
|
+
Default Value: False
|
|
147
|
+
Types: bool
|
|
148
|
+
|
|
149
|
+
RETURNS:
|
|
150
|
+
None
|
|
151
|
+
|
|
152
|
+
RAISES:
|
|
153
|
+
None
|
|
154
|
+
|
|
155
|
+
EXAMPLES:
|
|
156
|
+
>>> rfe_features = ["feature1", "feature2", "feature3"]
|
|
157
|
+
>>> lasso_features = ["feature1", "feature4", "feature5"]
|
|
158
|
+
>>> pca_features = ["pc1", "pc2", "pc3"]
|
|
159
|
+
>>> feature_lists = [rfe_features, lasso_features, pca_features]
|
|
160
|
+
>>> model_trainer = _ModelTraining(data=scaled_df,
|
|
161
|
+
... target_column="target",
|
|
162
|
+
... id_column="id",
|
|
163
|
+
... model_list=["xgboost", "randomforest"],
|
|
164
|
+
... verbose=1,
|
|
165
|
+
... features=feature_lists,
|
|
166
|
+
... task_type="Classification",
|
|
167
|
+
... persist=True,
|
|
168
|
+
... seed=42)
|
|
169
|
+
"""
|
|
170
|
+
self.data = data
|
|
171
|
+
self.target_column = target_column
|
|
172
|
+
self.id_column = id_column
|
|
173
|
+
self.model_list = model_list
|
|
174
|
+
self.verbose = verbose
|
|
175
|
+
self.task_type = task_type
|
|
176
|
+
self.custom_data = custom_data
|
|
177
|
+
self.labels = self.data.drop_duplicate(self.target_column).size
|
|
178
|
+
self.startify_col = None
|
|
179
|
+
self.persist = kwargs.get("persist", False)
|
|
180
|
+
self.volatile = kwargs.get("volatile", False)
|
|
181
|
+
self.seed = kwargs.get("seed", 42)
|
|
182
|
+
self.cluster = kwargs.get("cluster", False)
|
|
183
|
+
self.enable_lasso = kwargs.get("enable_lasso", False)
|
|
184
|
+
|
|
185
|
+
self.data_transform_dict = data_transform_dict if data_transform_dict is not None else {}
|
|
186
|
+
|
|
187
|
+
if not self.cluster:
|
|
188
|
+
# For non-clustering: include lasso features only if lasso selection is enabled
|
|
189
|
+
self.features = tuple(features[:3 if self.enable_lasso else 2])
|
|
190
|
+
else:
|
|
191
|
+
# For clustering: always use first two feature sets (pca and non pca)
|
|
192
|
+
self.features = tuple(features[:2])
|
|
193
|
+
|
|
194
|
+
def model_training(self,
|
|
195
|
+
auto=True,
|
|
196
|
+
max_runtime_secs=None,
|
|
197
|
+
stopping_metric=None,
|
|
198
|
+
stopping_tolerance=0,
|
|
199
|
+
max_models=None):
|
|
200
|
+
"""
|
|
201
|
+
DESCRIPTION:
|
|
202
|
+
Function to perform following tasks:-
|
|
203
|
+
1. Generates the hyperparameters for different ML models.
|
|
204
|
+
2. Performs hyperparameter tunning for different ML models in parallel.
|
|
205
|
+
3. Displays the leaderboard of trained ML models.
|
|
206
|
+
|
|
207
|
+
PARAMETERS:
|
|
208
|
+
auto:
|
|
209
|
+
Optional Argument.
|
|
210
|
+
Specifies whether to run data preparation in auto mode or custom mode.
|
|
211
|
+
When set to True, runs automtically otherwise, it take user inputs.
|
|
212
|
+
Default Value: True
|
|
213
|
+
Types: boolean
|
|
214
|
+
|
|
215
|
+
max_runtime_secs:
|
|
216
|
+
Optional Argument.
|
|
217
|
+
Specifies the time limit in seconds for model training.
|
|
218
|
+
Types: int
|
|
219
|
+
|
|
220
|
+
stopping_metric:
|
|
221
|
+
Required, when "stopping_tolerance" is set, otherwise optional.
|
|
222
|
+
Specifies the stopping mertics for stopping tolerance in model training.
|
|
223
|
+
Types: str
|
|
224
|
+
|
|
225
|
+
stopping_tolerance:
|
|
226
|
+
Required, when "stopping_metric" is set, otherwise optional.
|
|
227
|
+
Specifies the stopping tolerance for stopping metrics in model training.
|
|
228
|
+
Types: float
|
|
229
|
+
|
|
230
|
+
max_models:
|
|
231
|
+
Optional Argument.
|
|
232
|
+
Specifies the maximum number of models to be trained.
|
|
233
|
+
Types: int
|
|
234
|
+
|
|
235
|
+
RETURNS:
|
|
236
|
+
pandas dataframes containing model information, leaderboard,
|
|
237
|
+
data transformation dictionary, and target column distinct count.
|
|
238
|
+
|
|
239
|
+
RAISES:
|
|
240
|
+
None
|
|
241
|
+
|
|
242
|
+
EXAMPLES:
|
|
243
|
+
>>> model_trainer = _ModelTraining(data=scaled_df,
|
|
244
|
+
... target_column="target",
|
|
245
|
+
... model_list=["xgboost", "randomforest"],
|
|
246
|
+
... features=feature_lists,
|
|
247
|
+
... task_type="Classification")
|
|
248
|
+
>>> model_info, leaderboard, data_transformation_params, \
|
|
249
|
+
... target_count = model_trainer.model_training(auto=True, max_models=10)
|
|
250
|
+
"""
|
|
251
|
+
self.stopping_metric = stopping_metric
|
|
252
|
+
self.stopping_tolerance = stopping_tolerance
|
|
253
|
+
self.max_runtime_secs = max_runtime_secs
|
|
254
|
+
self.max_models = max_models
|
|
255
|
+
|
|
256
|
+
self._display_heading(phase=3, progress_bar=self.progress_bar)
|
|
257
|
+
self._display_msg(msg='Model Training started ...',
|
|
258
|
+
progress_bar=self.progress_bar,
|
|
259
|
+
show_data=True)
|
|
260
|
+
# Generates the hyperparameters for different ML models
|
|
261
|
+
parameters = self._generate_parameter()
|
|
262
|
+
|
|
263
|
+
# handles customized hyperparameters
|
|
264
|
+
if not auto:
|
|
265
|
+
parameters = self._custom_hyperparameters(parameters)
|
|
266
|
+
|
|
267
|
+
# Validates the upper limit of max_models based on total model combinations
|
|
268
|
+
if self.max_models is not None:
|
|
269
|
+
self._validate_upper_limit_for_max_models(parameters)
|
|
270
|
+
|
|
271
|
+
if self.verbose == 2:
|
|
272
|
+
self._display_hyperparameters(parameters)
|
|
273
|
+
|
|
274
|
+
# Parallel execution of hpt
|
|
275
|
+
trained_models_info = self._parallel_training(parameters)
|
|
276
|
+
|
|
277
|
+
# Displaying leaderboard
|
|
278
|
+
leader_board, models = self._display_leaderboard(trained_models_info)
|
|
279
|
+
|
|
280
|
+
self._display_heading(phase=4,
|
|
281
|
+
progress_bar=self.progress_bar)
|
|
282
|
+
self.progress_bar.update()
|
|
283
|
+
|
|
284
|
+
return models, leader_board, self.data_transform_dict, self.labels
|
|
285
|
+
|
|
286
|
+
def _get_model_param_space(self,
|
|
287
|
+
hyperparameters):
|
|
288
|
+
"""
|
|
289
|
+
DESCRIPTION:
|
|
290
|
+
Internal function to calculate the total number of models to be trained for specific model.
|
|
291
|
+
|
|
292
|
+
PARAMETERS:
|
|
293
|
+
hyperparameters:
|
|
294
|
+
Required Argument.
|
|
295
|
+
Specifies the hyperparameters availables for ML model.
|
|
296
|
+
Types: list of dict
|
|
297
|
+
|
|
298
|
+
RETURNS:
|
|
299
|
+
int containing, total number of models available for training.
|
|
300
|
+
|
|
301
|
+
RAISES:
|
|
302
|
+
None
|
|
303
|
+
|
|
304
|
+
EXAMPLES:
|
|
305
|
+
>>> models_count = self._get_model_param_space(hyperparameters={'name': 'xgboost', 'max_depth': (5, 7, 10)})
|
|
306
|
+
"""
|
|
307
|
+
# Creating all possible combinations of hyperparameters
|
|
308
|
+
if 'param_grid' in hyperparameters:
|
|
309
|
+
grid = hyperparameters['param_grid']
|
|
310
|
+
else:
|
|
311
|
+
# AutoML style: full dict is hyperparameter space
|
|
312
|
+
grid = hyperparameters
|
|
313
|
+
all_combinations = list(product(*[v if isinstance(v, (list, tuple)) else [v] for v in grid.values()]))
|
|
314
|
+
# Getting total number of models for each model model training function
|
|
315
|
+
total_models = len(all_combinations)
|
|
316
|
+
return total_models
|
|
317
|
+
|
|
318
|
+
def _validate_upper_limit_for_max_models(self,
|
|
319
|
+
hyperparameters_list):
|
|
320
|
+
"""
|
|
321
|
+
DESCRIPTION:
|
|
322
|
+
Internal function to validate the upper limit of max_models.
|
|
323
|
+
|
|
324
|
+
PARAMETERS:
|
|
325
|
+
hyperparameters_list:
|
|
326
|
+
Required Argument.
|
|
327
|
+
Specifies the hyperparameters for different ML models.
|
|
328
|
+
Types: list of dict
|
|
329
|
+
|
|
330
|
+
RETURNS:
|
|
331
|
+
None
|
|
332
|
+
|
|
333
|
+
RAISES:
|
|
334
|
+
TeradataMlException, ValueError
|
|
335
|
+
|
|
336
|
+
EXAMPLES:
|
|
337
|
+
>>> self._validate_upper_limit_for_max_models(hyperparameters_list=[{'name': 'xgboost', 'max_depth': (5, 7)}, {'name': 'svm', 'lambda1': (0.01, 0.1)}])
|
|
338
|
+
"""
|
|
339
|
+
model_param_space = 0
|
|
340
|
+
for hyperparameter_dct in hyperparameters_list:
|
|
341
|
+
# getting total number of models for each model
|
|
342
|
+
total_models = self._get_model_param_space(hyperparameter_dct)
|
|
343
|
+
model_param_space += total_models
|
|
344
|
+
|
|
345
|
+
# Validating upper range for max_models
|
|
346
|
+
_Validators._validate_argument_range(self.max_models, "max_models", ubound=model_param_space, ubound_inclusive=True)
|
|
347
|
+
|
|
348
|
+
def _display_hyperparameters(self,
|
|
349
|
+
hyperparameters_list):
|
|
350
|
+
"""
|
|
351
|
+
DESCRIPTION:
|
|
352
|
+
Internal function to display the hyperparameters for different ML models.
|
|
353
|
+
|
|
354
|
+
PARAMETERS:
|
|
355
|
+
hyperparameters_list:
|
|
356
|
+
Required Argument.
|
|
357
|
+
Specifies the hyperparameters for different ML models.
|
|
358
|
+
Types: list of dict
|
|
359
|
+
|
|
360
|
+
RETURNS:
|
|
361
|
+
None
|
|
362
|
+
|
|
363
|
+
RAISES:
|
|
364
|
+
None
|
|
365
|
+
|
|
366
|
+
EXAMPLES:
|
|
367
|
+
>>> self._display_hyperparameters(hyperparameters_list=[{'name': 'xgboost', 'max_depth': (5, 7)}, {'name': 'svm', 'lambda1': (0.01, 0.1)}])
|
|
368
|
+
"""
|
|
369
|
+
self._display_msg(msg="Hyperparameters used for model training: ",
|
|
370
|
+
progress_bar=self.progress_bar,
|
|
371
|
+
show_data=True)
|
|
372
|
+
print(" " *150, end='\r', flush=True)
|
|
373
|
+
|
|
374
|
+
# Iterating over hyperparameters_list
|
|
375
|
+
for hyperparameter_dct in hyperparameters_list:
|
|
376
|
+
name = hyperparameter_dct.get("name", "Unnamed Model")
|
|
377
|
+
self._display_msg(msg=f"Model: {name}")
|
|
378
|
+
|
|
379
|
+
if self.cluster and "param_grid" in hyperparameter_dct:
|
|
380
|
+
# Also show metadata outside param_grid
|
|
381
|
+
self._display_msg(msg=f"Hyperparameter Grid: {hyperparameter_dct['param_grid']}")
|
|
382
|
+
else:
|
|
383
|
+
self._display_msg(msg=f"Hyperparameters: {hyperparameter_dct}")
|
|
384
|
+
|
|
385
|
+
total_models = self._get_model_param_space(hyperparameter_dct)
|
|
386
|
+
|
|
387
|
+
self._display_msg(msg=f"Total number of models for {name}: {total_models}")
|
|
388
|
+
print(f"--" * 100 + "\n")
|
|
389
|
+
|
|
390
|
+
def _display_leaderboard(self,
|
|
391
|
+
trained_models_info):
|
|
392
|
+
"""
|
|
393
|
+
DESCRIPTION:
|
|
394
|
+
Internal function to display the trainined ML models.
|
|
395
|
+
|
|
396
|
+
PARAMETERS:
|
|
397
|
+
trained_models_info:
|
|
398
|
+
Required Argument.
|
|
399
|
+
Specifies the trained models information to display.
|
|
400
|
+
Types: pandas Dataframe
|
|
401
|
+
|
|
402
|
+
RETURNS:
|
|
403
|
+
tuple containing, leaderboard as pandas Dataframe and sorted models as pandas Dataframe
|
|
404
|
+
|
|
405
|
+
RAISES:
|
|
406
|
+
None
|
|
407
|
+
|
|
408
|
+
EXAMPLES:
|
|
409
|
+
>>> leaderboard, sorted_model_df = self._display_leaderboard(trained_models_info=trained_models_df)
|
|
410
|
+
"""
|
|
411
|
+
# Initialize sorted_model_df
|
|
412
|
+
if trained_models_info.empty:
|
|
413
|
+
self._display_msg(msg="No models were trained successfully. Please check error logs for details.",
|
|
414
|
+
progress_bar=self.progress_bar,
|
|
415
|
+
show_data=True)
|
|
416
|
+
sorted_model_df = trained_models_info.copy()
|
|
417
|
+
else:
|
|
418
|
+
# Sort based on task type and clustering
|
|
419
|
+
if not self.cluster:
|
|
420
|
+
if self.task_type != "Regression":
|
|
421
|
+
sorted_model_df = trained_models_info.sort_values(by=['MICRO-F1', 'WEIGHTED-F1'],
|
|
422
|
+
ascending=[False, False]).reset_index(drop=True)
|
|
423
|
+
else:
|
|
424
|
+
sorted_model_df = trained_models_info.sort_values(by='R2',
|
|
425
|
+
ascending=False).reset_index(drop=True)
|
|
426
|
+
else:
|
|
427
|
+
sorted_model_df = trained_models_info.sort_values(by=['SILHOUETTE', 'CALINSKI', 'DAVIES'],
|
|
428
|
+
ascending=[False, False, True]).reset_index(drop=True)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
# Adding rank to leaderboard
|
|
432
|
+
sorted_model_df.insert(0, 'RANK', sorted_model_df.index + 1)
|
|
433
|
+
|
|
434
|
+
# Internal Data list for leaderboard
|
|
435
|
+
dp_lst = ["model-obj", "DATA_TABLE", "RESULT_TABLE", "PARAMETERS"]
|
|
436
|
+
|
|
437
|
+
# Excluding the model object and model name from leaderboard
|
|
438
|
+
leaderboard = sorted_model_df.drop(columns=[col for col in dp_lst if col in sorted_model_df.columns])
|
|
439
|
+
|
|
440
|
+
# Filter rows based on max_models only for non-empty DataFrame
|
|
441
|
+
if not trained_models_info.empty and self.max_models is not None:
|
|
442
|
+
leaderboard = leaderboard[leaderboard["RANK"] <= self.max_models]
|
|
443
|
+
|
|
444
|
+
self._display_msg(msg="Leaderboard",
|
|
445
|
+
progress_bar=self.progress_bar,
|
|
446
|
+
data=leaderboard,
|
|
447
|
+
show_data=True)
|
|
448
|
+
|
|
449
|
+
return leaderboard, sorted_model_df
|
|
450
|
+
|
|
451
|
+
def _update_hyperparameters(self,
|
|
452
|
+
existing_params,
|
|
453
|
+
new_params):
|
|
454
|
+
"""
|
|
455
|
+
DESCRIPTION:
|
|
456
|
+
Function to update customized hyperparameters by performing addition or replacement
|
|
457
|
+
based on user input.
|
|
458
|
+
|
|
459
|
+
PARAMETERS:
|
|
460
|
+
existing_params:
|
|
461
|
+
Required Argument.
|
|
462
|
+
Specifies the existing generated hyperparameters for specific model.
|
|
463
|
+
Types: dict
|
|
464
|
+
|
|
465
|
+
new_params:
|
|
466
|
+
Required Argument.
|
|
467
|
+
Specifies the newly passed hyperparameters from user input.
|
|
468
|
+
Types: dict
|
|
469
|
+
|
|
470
|
+
RETURNS:
|
|
471
|
+
Updated dictionary containing hyperparameters for specific model.
|
|
472
|
+
|
|
473
|
+
RAISES:
|
|
474
|
+
None
|
|
475
|
+
|
|
476
|
+
EXAMPLES:
|
|
477
|
+
>>> params = self._update_hyperparameters(existing_params={'max_depth': (5, 7)}, new_params={'max_depth': {'Method': 'ADD', 'Value': [10, 15]}})
|
|
478
|
+
"""
|
|
479
|
+
# Iterating over new hyperparameters and performing required operation
|
|
480
|
+
# based on passed method ADD or REPLACE
|
|
481
|
+
if self.cluster:
|
|
482
|
+
# Clustering: use param_grid
|
|
483
|
+
param_grid = existing_params.get("param_grid", {})
|
|
484
|
+
for feature, param_list in new_params.items():
|
|
485
|
+
if feature in param_grid:
|
|
486
|
+
if param_list["Method"] == "ADD":
|
|
487
|
+
param_grid[feature] = list(param_grid[feature])
|
|
488
|
+
param_grid[feature].extend(param_list["Value"])
|
|
489
|
+
param_grid[feature] = tuple(set(param_grid[feature]))
|
|
490
|
+
elif param_list["Method"] == "REPLACE":
|
|
491
|
+
param_grid[feature] = tuple(param_list["Value"])
|
|
492
|
+
else:
|
|
493
|
+
self._display_msg(inline_msg="Passed method is not valid.")
|
|
494
|
+
else:
|
|
495
|
+
param_grid[feature] = tuple(param_list["Value"])
|
|
496
|
+
existing_params["param_grid"] = param_grid
|
|
497
|
+
|
|
498
|
+
else:
|
|
499
|
+
for feature, param_list in new_params.items():
|
|
500
|
+
if feature in existing_params.keys():
|
|
501
|
+
if param_list["Method"] == "ADD":
|
|
502
|
+
# Extending existing list
|
|
503
|
+
existing_params[feature] = list(existing_params[feature])
|
|
504
|
+
existing_params[feature].extend(param_list["Value"])
|
|
505
|
+
# Updating list with unique values.
|
|
506
|
+
existing_params[feature]=tuple(set(existing_params[feature]))
|
|
507
|
+
elif param_list["Method"] == "REPLACE":
|
|
508
|
+
# Replacing with entirely new value
|
|
509
|
+
existing_params[feature] = tuple(param_list["Value"])
|
|
510
|
+
else:
|
|
511
|
+
self._display_msg(inline_msg="Passed method is not valid.")
|
|
512
|
+
else:
|
|
513
|
+
self._display_msg(inline_msg="Passed model argument {} is not"
|
|
514
|
+
" available for model {}. Skipping it."
|
|
515
|
+
.format(feature,existing_params['name']))
|
|
516
|
+
continue
|
|
517
|
+
# Returning updated hyperparamter
|
|
518
|
+
return existing_params
|
|
519
|
+
|
|
520
|
+
def _custom_hyperparameters(self,
|
|
521
|
+
hyperparameters):
|
|
522
|
+
"""
|
|
523
|
+
DESCRIPTION:
|
|
524
|
+
Function to extract and update hyperaparameters from user input for model training.
|
|
525
|
+
|
|
526
|
+
PARAMETERS:
|
|
527
|
+
hyperparameters:
|
|
528
|
+
Required Argument.
|
|
529
|
+
Specifies the existing generated hyperparameters for all models.
|
|
530
|
+
Types: list
|
|
531
|
+
|
|
532
|
+
RETURNS:
|
|
533
|
+
Updated list of dictionaries containing hyperparameterd for all models.
|
|
534
|
+
|
|
535
|
+
RAISES:
|
|
536
|
+
None
|
|
537
|
+
|
|
538
|
+
EXAMPLES:
|
|
539
|
+
>>> hyperparameters = self._custom_hyperparameters(hyperparameters=[{'name': 'xgboost', 'max_depth': (5, 7)}, {'name': 'svm', 'lambda1': (0.01, 0.1)}])
|
|
540
|
+
"""
|
|
541
|
+
self._display_msg(msg="Starting customized hyperparameter update ...",
|
|
542
|
+
progress_bar=self.progress_bar,
|
|
543
|
+
show_data=True)
|
|
544
|
+
|
|
545
|
+
# Fetching user input for performing hyperparameter tuning
|
|
546
|
+
hyperparameter_tuning_input = self.custom_data.get("HyperparameterTuningIndicator", False)
|
|
547
|
+
if hyperparameter_tuning_input:
|
|
548
|
+
# Extracting models and its corresponding hyperparameters details
|
|
549
|
+
model_hyperparameters = self.custom_data.get("HyperparameterTuningParam", None)
|
|
550
|
+
# Getting model index for mapping
|
|
551
|
+
model_index_param = self.model_mapping
|
|
552
|
+
# Checking hyperparameters passed by user and mapping them according to model
|
|
553
|
+
if model_hyperparameters:
|
|
554
|
+
for model_name, hyp_list in model_hyperparameters.items():
|
|
555
|
+
if model_name in list(model_index_param.keys()):
|
|
556
|
+
model_index = model_index_param[model_name]
|
|
557
|
+
else:
|
|
558
|
+
self._display_msg(inline_msg="Passed model {} is not available for training.".format(model_name))
|
|
559
|
+
continue
|
|
560
|
+
# Updating existing hyperparameters with customized hyperparameters as per user input
|
|
561
|
+
hyperparameters[model_index]=self._update_hyperparameters(hyperparameters[model_index],hyp_list)
|
|
562
|
+
# Displaying it after update
|
|
563
|
+
self._display_msg(inline_msg="Completed customized hyperparameter update.",
|
|
564
|
+
progress_bar=self.progress_bar)
|
|
565
|
+
else:
|
|
566
|
+
self._display_msg(inline_msg="No information provided for custom hyperparameters. AutoML will proceed with default values.",
|
|
567
|
+
progress_bar=self.progress_bar)
|
|
568
|
+
else:
|
|
569
|
+
self._display_msg(inline_msg="Skipping customized hyperparameter tuning",
|
|
570
|
+
progress_bar=self.progress_bar)
|
|
571
|
+
# Retunring updated hyperparameters for all models
|
|
572
|
+
return hyperparameters
|
|
573
|
+
|
|
574
|
+
# Hyperparameter generation for XGBoost or Decision Forest
|
|
575
|
+
def _get_tree_model_hyperparameters(self,
|
|
576
|
+
num_rows,
|
|
577
|
+
num_cols,
|
|
578
|
+
model_name):
|
|
579
|
+
"""
|
|
580
|
+
DESCRIPTION:
|
|
581
|
+
Internal function to generate hyperparameters for tree based model i.e., XGBoost or Decision Forest.
|
|
582
|
+
|
|
583
|
+
PARAMETERS:
|
|
584
|
+
num_rows:
|
|
585
|
+
Required Argument.
|
|
586
|
+
Specifies the number of rows in dataset.
|
|
587
|
+
Types: int
|
|
588
|
+
|
|
589
|
+
num_cols:
|
|
590
|
+
Required Argument.
|
|
591
|
+
Specifies the number of columns in dataset.
|
|
592
|
+
Types: int
|
|
593
|
+
|
|
594
|
+
model_name:
|
|
595
|
+
Required Argument.
|
|
596
|
+
Specifies which linear model is getting used for generating hyperparameters.
|
|
597
|
+
Types: Str
|
|
598
|
+
|
|
599
|
+
RETURNS:
|
|
600
|
+
dict containing, hyperparameters for XGBoost or Decision Forest.
|
|
601
|
+
|
|
602
|
+
RAISES:
|
|
603
|
+
None
|
|
604
|
+
|
|
605
|
+
EXAMPLES:
|
|
606
|
+
>>> params = self._get_tree_model_hyperparameters(num_rows=5000, num_cols=15, model_name="xgboost")
|
|
607
|
+
"""
|
|
608
|
+
# Initializing hyperparameters based on default value
|
|
609
|
+
min_impurity = [0.0]
|
|
610
|
+
shrinkage_factor = [0.5]
|
|
611
|
+
max_depth = [5]
|
|
612
|
+
min_node_size = [1]
|
|
613
|
+
iter_num = [10]
|
|
614
|
+
num_trees = [-1]
|
|
615
|
+
lambda1 = [1.0] # Initialize lambda1 with default value
|
|
616
|
+
num_boosted_trees = [-1] # Initialize num_boosted_trees with default value
|
|
617
|
+
|
|
618
|
+
# Extending values for hyperparameters based on dataset size, i.e., number of rows and columns
|
|
619
|
+
if num_rows < 1000 and num_cols < 10:
|
|
620
|
+
min_impurity.extend([0.1])
|
|
621
|
+
shrinkage_factor.extend([0.1, 0.2])
|
|
622
|
+
max_depth.extend([6, 7, 8])
|
|
623
|
+
min_node_size.extend([2])
|
|
624
|
+
iter_num.extend([20])
|
|
625
|
+
lambda1.extend([0.001, 0.01])
|
|
626
|
+
num_boosted_trees.extend([2, 5])
|
|
627
|
+
elif num_rows < 10000 and num_cols < 15:
|
|
628
|
+
min_impurity.extend([0.1, 0.2])
|
|
629
|
+
shrinkage_factor.extend([0.1, 0.3])
|
|
630
|
+
max_depth.extend([6, 8, 10])
|
|
631
|
+
min_node_size.extend([2, 3])
|
|
632
|
+
iter_num.extend([20, 30])
|
|
633
|
+
lambda1.extend([0.01, 0.1])
|
|
634
|
+
num_boosted_trees.extend([5, 10])
|
|
635
|
+
elif num_rows < 100000 and num_cols < 20:
|
|
636
|
+
min_impurity.extend([0.2, 0.3])
|
|
637
|
+
shrinkage_factor.extend([0.01, 0.1, 0.2])
|
|
638
|
+
max_depth.extend([4, 6, 7])
|
|
639
|
+
min_node_size.extend([3, 4])
|
|
640
|
+
iter_num.extend([30, 40])
|
|
641
|
+
lambda1.extend([0.1, 1.0])
|
|
642
|
+
num_boosted_trees.extend([10, 20])
|
|
643
|
+
else:
|
|
644
|
+
min_impurity.extend([0.1, 0.2, 0.3])
|
|
645
|
+
shrinkage_factor.extend([0.01, 0.05, 0.1])
|
|
646
|
+
max_depth.extend([3, 4, 7, 8])
|
|
647
|
+
min_node_size.extend([2, 3, 4])
|
|
648
|
+
iter_num.extend([20, 30, 40])
|
|
649
|
+
lambda1.extend([1.0, 10.0, 100.0])
|
|
650
|
+
num_boosted_trees.extend([20, 50, 100])
|
|
651
|
+
|
|
652
|
+
# Hyperparameters for XGBoost model
|
|
653
|
+
xgb_params = {
|
|
654
|
+
'response_column': self.target_column,
|
|
655
|
+
'name':'xgboost',
|
|
656
|
+
'model_type': 'Regression',
|
|
657
|
+
'column_sampling': (1, .6),
|
|
658
|
+
'min_impurity': tuple(min_impurity),
|
|
659
|
+
'lambda1': tuple(lambda1),
|
|
660
|
+
'shrinkage_factor': tuple(shrinkage_factor),
|
|
661
|
+
'max_depth': tuple(max_depth),
|
|
662
|
+
'min_node_size': tuple(min_node_size),
|
|
663
|
+
'iter_num': tuple(iter_num),
|
|
664
|
+
'num_boosted_trees': tuple(num_boosted_trees),
|
|
665
|
+
'seed': self.seed
|
|
666
|
+
}
|
|
667
|
+
# Hyperparameters for Decision Forest model
|
|
668
|
+
df_params = {
|
|
669
|
+
'response_column': self.target_column,
|
|
670
|
+
'name': 'decision_forest',
|
|
671
|
+
'tree_type': 'Regression',
|
|
672
|
+
'min_impurity': tuple(min_impurity),
|
|
673
|
+
'max_depth': tuple(max_depth),
|
|
674
|
+
'min_node_size': tuple(min_node_size),
|
|
675
|
+
'num_trees': tuple(num_trees),
|
|
676
|
+
'seed': self.seed
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
# Updating model type in case of classification
|
|
680
|
+
if self.task_type == "Classification":
|
|
681
|
+
xgb_params['model_type'] = 'Classification'
|
|
682
|
+
df_params['tree_type'] = 'Classification'
|
|
683
|
+
|
|
684
|
+
# Returning hyperparameters based on passed model
|
|
685
|
+
if model_name == 'xgboost':
|
|
686
|
+
return xgb_params
|
|
687
|
+
elif model_name == 'decision_forest':
|
|
688
|
+
return df_params
|
|
689
|
+
else:
|
|
690
|
+
return None
|
|
691
|
+
|
|
692
|
+
# Hyperparameter generation for KNN
|
|
693
|
+
def _get_knn_hyperparameters(self,
|
|
694
|
+
num_rows=None,
|
|
695
|
+
num_cols=None):
|
|
696
|
+
"""
|
|
697
|
+
DESCRIPTION:
|
|
698
|
+
Internal function to generate hyperparameters for KNN.
|
|
699
|
+
|
|
700
|
+
PARAMETERS:
|
|
701
|
+
num_rows
|
|
702
|
+
Required Argument.
|
|
703
|
+
Specifies the number of rows in dataset.
|
|
704
|
+
Types: int
|
|
705
|
+
|
|
706
|
+
num_cols:
|
|
707
|
+
Required Argument.
|
|
708
|
+
Specifies the number of columns in dataset.
|
|
709
|
+
Types: int
|
|
710
|
+
|
|
711
|
+
RETURNS:
|
|
712
|
+
dict containing, hyperparameters for KNN.
|
|
713
|
+
|
|
714
|
+
RAISES:
|
|
715
|
+
None
|
|
716
|
+
|
|
717
|
+
EXAMPLES:
|
|
718
|
+
>>> params = self._get_knn_hyperparameters(num_rows=2000, num_cols=12)
|
|
719
|
+
"""
|
|
720
|
+
params = {
|
|
721
|
+
'response_column': self.target_column,
|
|
722
|
+
'name': 'knn',
|
|
723
|
+
'model_type': 'Regression',
|
|
724
|
+
'k': (3, 5, 6, 8, 10, 12),
|
|
725
|
+
"id_column": self.id_column,
|
|
726
|
+
"voting_weight": 1.0
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
if self.task_type == "Classification":
|
|
730
|
+
params['model_type'] = 'Classification'
|
|
731
|
+
|
|
732
|
+
return params
|
|
733
|
+
|
|
734
|
+
# Hyperparameter generation for SVM/GLM
|
|
735
|
+
def _get_linear_model_hyperparameters(self,
|
|
736
|
+
num_rows,
|
|
737
|
+
num_cols,
|
|
738
|
+
model_name):
|
|
739
|
+
"""
|
|
740
|
+
DESCRIPTION:
|
|
741
|
+
Internal function to generate hyperparameters for linear models i.e., SVM or GLM.
|
|
742
|
+
|
|
743
|
+
PARAMETERS:
|
|
744
|
+
num_rows:
|
|
745
|
+
Required Argument.
|
|
746
|
+
Specifies the number of rows in dataset.
|
|
747
|
+
Types: int
|
|
748
|
+
|
|
749
|
+
num_cols:
|
|
750
|
+
Required Argument.
|
|
751
|
+
Specifies the number of columns in dataset.
|
|
752
|
+
Types: int
|
|
753
|
+
|
|
754
|
+
model_name:
|
|
755
|
+
Required Argument.
|
|
756
|
+
Specifies which tree model is getting used for generating hyperparameters.
|
|
757
|
+
Types: Str
|
|
758
|
+
|
|
759
|
+
RETURNS:
|
|
760
|
+
dict containing, hyperparameters for SVM or GLM.
|
|
761
|
+
|
|
762
|
+
RAISES:
|
|
763
|
+
None
|
|
764
|
+
|
|
765
|
+
EXAMPLES:
|
|
766
|
+
>>> params = self._get_linear_model_hyperparameters(num_rows=10000, num_cols=25, model_name="glm")
|
|
767
|
+
"""
|
|
768
|
+
# Initializing hyperparameters based on default value
|
|
769
|
+
iter_max = [300]
|
|
770
|
+
batch_size = [10]
|
|
771
|
+
|
|
772
|
+
# Extending values for hyperparameters based on dataset size i.e., number of rows and columns
|
|
773
|
+
if num_rows < 1000 and num_cols < 10:
|
|
774
|
+
iter_max.extend([100, 200])
|
|
775
|
+
batch_size.extend([20, 40, 50])
|
|
776
|
+
elif num_rows < 10000 and num_cols < 15:
|
|
777
|
+
iter_max.extend([200, 400])
|
|
778
|
+
batch_size.extend([50, 60, 80])
|
|
779
|
+
elif num_rows < 100000 and num_cols < 20:
|
|
780
|
+
iter_max.extend([400])
|
|
781
|
+
batch_size.extend([100, 150])
|
|
782
|
+
else:
|
|
783
|
+
iter_max.extend([200, 400, 500])
|
|
784
|
+
batch_size.extend([80, 100, 150])
|
|
785
|
+
|
|
786
|
+
# Hyperparameters for SVM model
|
|
787
|
+
svm_params = {
|
|
788
|
+
'response_column': self.target_column,
|
|
789
|
+
'name':'svm',
|
|
790
|
+
'model_type':'regression',
|
|
791
|
+
'lambda1':(0.001, 0.02, 0.1),
|
|
792
|
+
'alpha':(.15, .85),
|
|
793
|
+
'tolerance':(0.001, 0.01),
|
|
794
|
+
'learning_rate':('Invtime','Adaptive','constant'),
|
|
795
|
+
'initial_eta' : (0.05, 0.1),
|
|
796
|
+
'momentum':(0.65, 0.8, 0.95),
|
|
797
|
+
'nesterov': True,
|
|
798
|
+
'intercept': True,
|
|
799
|
+
'iter_num_no_change':(5, 10, 50),
|
|
800
|
+
'local_sgd_iterations ': (10, 20),
|
|
801
|
+
'iter_max' : tuple(iter_max),
|
|
802
|
+
'batch_size' : tuple(batch_size)
|
|
803
|
+
}
|
|
804
|
+
# Hyperparameters for GLM model
|
|
805
|
+
glm_params={
|
|
806
|
+
'response_column': self.target_column,
|
|
807
|
+
'name': 'glm',
|
|
808
|
+
'family': 'GAUSSIAN',
|
|
809
|
+
'lambda1':(0.001, 0.02, 0.1),
|
|
810
|
+
'alpha': (0.15, 0.85),
|
|
811
|
+
'learning_rate': ('invtime', 'constant', 'adaptive'),
|
|
812
|
+
'initial_eta': (0.05, 0.1),
|
|
813
|
+
'momentum': (0.65, 0.8, 0.95),
|
|
814
|
+
'iter_num_no_change':(5, 10, 50),
|
|
815
|
+
'iter_max' : tuple(iter_max),
|
|
816
|
+
'batch_size' : tuple(batch_size)
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
# Updating model type in case of classification
|
|
820
|
+
if self.task_type == "Classification":
|
|
821
|
+
svm_params['model_type'] = 'Classification'
|
|
822
|
+
svm_params['learning_rate'] = 'OPTIMAL'
|
|
823
|
+
glm_params['family'] = 'BINOMIAL'
|
|
824
|
+
glm_params['learning_rate'] = 'OPTIMAL'
|
|
825
|
+
|
|
826
|
+
# Returning hyperparameters based on passed model
|
|
827
|
+
if model_name == 'svm':
|
|
828
|
+
return svm_params
|
|
829
|
+
elif model_name == 'glm':
|
|
830
|
+
return glm_params
|
|
831
|
+
else:
|
|
832
|
+
return None
|
|
833
|
+
|
|
834
|
+
def _get_kmeans_hyperparameters(self):
|
|
835
|
+
"""
|
|
836
|
+
DESCRIPTION:
|
|
837
|
+
Generates hyperparameters for kmeans clustering.
|
|
838
|
+
|
|
839
|
+
PARAMETERS:
|
|
840
|
+
None
|
|
841
|
+
|
|
842
|
+
RETURNS:
|
|
843
|
+
dict containing hyperparameters for kmeans.
|
|
844
|
+
|
|
845
|
+
RAISES:
|
|
846
|
+
None
|
|
847
|
+
|
|
848
|
+
EXAMPLES:
|
|
849
|
+
>>> params = self._get_kmeans_hyperparameters()
|
|
850
|
+
"""
|
|
851
|
+
params = {
|
|
852
|
+
"name": "kmeans",
|
|
853
|
+
"param_grid": {
|
|
854
|
+
'n_clusters': (2,3,4,5,6,7,8,9,10),
|
|
855
|
+
'init': ('k-means++', 'random'),
|
|
856
|
+
'n_init': (5, 10),
|
|
857
|
+
'max_iter': (100, 200),
|
|
858
|
+
'tol': (0.001, 0.01),
|
|
859
|
+
'algorithm': ('lloyd', 'elkan')
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
return params
|
|
864
|
+
|
|
865
|
+
def _get_gmm_hyperparameters(self):
|
|
866
|
+
"""
|
|
867
|
+
DESCRIPTION:
|
|
868
|
+
Generates hyperparameters for Gaussian Mixture Model (GMM).
|
|
869
|
+
|
|
870
|
+
PARAMETERS:
|
|
871
|
+
None
|
|
872
|
+
|
|
873
|
+
RETURNS:
|
|
874
|
+
dict containing hyperparameters for GMM.
|
|
875
|
+
|
|
876
|
+
RAISES:
|
|
877
|
+
None
|
|
878
|
+
|
|
879
|
+
EXAMPLES:
|
|
880
|
+
>>> params = self._get_gmm_hyperparameters()
|
|
881
|
+
"""
|
|
882
|
+
params = {
|
|
883
|
+
"name": "gaussianmixture",
|
|
884
|
+
"param_grid": {
|
|
885
|
+
"n_components": (2,3,4,5,6,7,8,9,10),
|
|
886
|
+
"covariance_type": ("full", "tied", "diag", "spherical"),
|
|
887
|
+
"max_iter": (100, 300)
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
return params
|
|
892
|
+
|
|
893
|
+
def _generate_parameter(self):
|
|
894
|
+
"""
|
|
895
|
+
DESCRIPTION:
|
|
896
|
+
Internal function to generate hyperparameters for ML models.
|
|
897
|
+
|
|
898
|
+
PARAMETERS:
|
|
899
|
+
None
|
|
900
|
+
|
|
901
|
+
RETURNS:
|
|
902
|
+
list containing, dict of hyperparameters for different ML models.
|
|
903
|
+
|
|
904
|
+
RAISES:
|
|
905
|
+
ValueError
|
|
906
|
+
|
|
907
|
+
EXAMPLES:
|
|
908
|
+
>>> parameters = self._generate_parameter()
|
|
909
|
+
"""
|
|
910
|
+
# list for storing hyperparameters
|
|
911
|
+
parameters = []
|
|
912
|
+
# Index for model mapping
|
|
913
|
+
model_index = 0
|
|
914
|
+
# Dictionary for mapping model with index
|
|
915
|
+
self.model_mapping={}
|
|
916
|
+
if not self.cluster:
|
|
917
|
+
# Getting number of rows and columns
|
|
918
|
+
num_rows = self.data.shape[0]
|
|
919
|
+
num_cols = self.data.shape[1]
|
|
920
|
+
|
|
921
|
+
# Model functions mapping for hyperparameter generation
|
|
922
|
+
model_functions = {
|
|
923
|
+
'decision_forest': self._get_tree_model_hyperparameters,
|
|
924
|
+
'xgboost': self._get_tree_model_hyperparameters,
|
|
925
|
+
'knn': self._get_knn_hyperparameters,
|
|
926
|
+
'glm': self._get_linear_model_hyperparameters,
|
|
927
|
+
'svm': self._get_linear_model_hyperparameters,
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
if not self.cluster:
|
|
931
|
+
supported_models = AutoMLConstants.SUPERVISED_MODELS.value
|
|
932
|
+
self.model_list = [model for model in self.model_list if model in supported_models]
|
|
933
|
+
|
|
934
|
+
# Generating hyperparameters for each model
|
|
935
|
+
if self.model_list:
|
|
936
|
+
for model in self.model_list:
|
|
937
|
+
self.model_mapping[model] = model_index
|
|
938
|
+
if model == 'knn':
|
|
939
|
+
parameters.append(model_functions[model](num_rows, num_cols))
|
|
940
|
+
else:
|
|
941
|
+
parameters.append(model_functions[model](num_rows, num_cols, model))
|
|
942
|
+
model_index += 1
|
|
943
|
+
else:
|
|
944
|
+
raise ValueError("No model is selected for training.")
|
|
945
|
+
else:
|
|
946
|
+
model_functions = {
|
|
947
|
+
'kmeans': self._get_kmeans_hyperparameters,
|
|
948
|
+
'gaussianmixture': self._get_gmm_hyperparameters,
|
|
949
|
+
}
|
|
950
|
+
supported_models = AutoMLConstants.CLUSTERING_MODELS.value
|
|
951
|
+
self.model_list = [model for model in self.model_list if model in supported_models]
|
|
952
|
+
if self.model_list:
|
|
953
|
+
for model in self.model_list:
|
|
954
|
+
self.model_mapping[model] = model_index
|
|
955
|
+
parameters.append(model_functions[model]())
|
|
956
|
+
model_index += 1
|
|
957
|
+
else:
|
|
958
|
+
raise ValueError("No model is selected for training.")
|
|
959
|
+
|
|
960
|
+
return parameters
|
|
961
|
+
|
|
962
|
+
def distribute_max_models(self):
|
|
963
|
+
"""
|
|
964
|
+
DESCRIPTION:
|
|
965
|
+
Internal function to distribute max_models across available model functions.
|
|
966
|
+
|
|
967
|
+
PARAMETERS:
|
|
968
|
+
None
|
|
969
|
+
|
|
970
|
+
RETURNS:
|
|
971
|
+
dictionary containing max_models distribution and list of models to remove.
|
|
972
|
+
|
|
973
|
+
RAISES:
|
|
974
|
+
None
|
|
975
|
+
|
|
976
|
+
EXAMPLES:
|
|
977
|
+
>>> model_distribution, models_to_remove = self.distribute_max_models()
|
|
978
|
+
"""
|
|
979
|
+
if self.cluster:
|
|
980
|
+
models = [model for model in self.model_list if model in AutoMLConstants.CLUSTERING_MODELS.value]
|
|
981
|
+
else:
|
|
982
|
+
models = [model for model in self.model_list if model in AutoMLConstants.SUPERVISED_MODELS.value]
|
|
983
|
+
# Getting total number of models
|
|
984
|
+
model_count = len(models)
|
|
985
|
+
# Evenly distributing max_models across models
|
|
986
|
+
base_assign = self.max_models // model_count
|
|
987
|
+
# Creating list of max_models for each model
|
|
988
|
+
distribution = [base_assign] * model_count
|
|
989
|
+
|
|
990
|
+
# Calculating remaining models
|
|
991
|
+
remaining_model_count = self.max_models % model_count
|
|
992
|
+
if remaining_model_count:
|
|
993
|
+
# distributing remaining model across models.
|
|
994
|
+
# Starting from first model in list and distributing remaining models by 1 each.
|
|
995
|
+
for i in range(remaining_model_count):
|
|
996
|
+
distribution[i] += 1
|
|
997
|
+
|
|
998
|
+
# Creating dictionary for model distribution
|
|
999
|
+
model_distribution = dict(zip(models, distribution))
|
|
1000
|
+
# Getting list of models with 0 distribution and removing them from model list
|
|
1001
|
+
# While for model having distribution greater than 0, updating distribution with
|
|
1002
|
+
# 1/3rd of original value as we are training with 3 different feature selection methods.
|
|
1003
|
+
models_to_remove = []
|
|
1004
|
+
# Feature selection count: cluster(2), with lasso(3), without lasso (2)
|
|
1005
|
+
fs_count = 2 if self.cluster else (3 if self.enable_lasso else 2)
|
|
1006
|
+
|
|
1007
|
+
for model in models:
|
|
1008
|
+
initial_count = model_distribution[model]
|
|
1009
|
+
if initial_count == 0:
|
|
1010
|
+
models_to_remove.append(model)
|
|
1011
|
+
else:
|
|
1012
|
+
model_distribution[model] = math.ceil(initial_count / fs_count)
|
|
1013
|
+
|
|
1014
|
+
return model_distribution, models_to_remove
|
|
1015
|
+
|
|
1016
|
+
def _parallel_training(self, parameters):
|
|
1017
|
+
"""
|
|
1018
|
+
DESCRIPTION:
|
|
1019
|
+
Internal function initiates the threadpool executor
|
|
1020
|
+
for hyperparameter tunning of ML models.
|
|
1021
|
+
|
|
1022
|
+
PARAMETERS:
|
|
1023
|
+
parameters:
|
|
1024
|
+
Required Argument.
|
|
1025
|
+
Specifies the hyperparamters for ML models.
|
|
1026
|
+
Types: list of dict
|
|
1027
|
+
|
|
1028
|
+
RETURNS:
|
|
1029
|
+
Pandas DataFrame containing, trained models information.
|
|
1030
|
+
|
|
1031
|
+
RAISES:
|
|
1032
|
+
None
|
|
1033
|
+
|
|
1034
|
+
EXAMPLES:
|
|
1035
|
+
>>> models_df = self._parallel_training(parameters=[{'name': 'xgboost', 'max_depth': (5, 7)}, {'name': 'svm', 'lambda1': (0.01, 0.1)}])
|
|
1036
|
+
"""
|
|
1037
|
+
self.model_id_counters = {}
|
|
1038
|
+
# Hyperparameters for each model
|
|
1039
|
+
model_params = parameters[:min(len(parameters), 5)]
|
|
1040
|
+
self._display_msg(msg="Performing hyperparameter tuning ...", progress_bar=self.progress_bar)
|
|
1041
|
+
|
|
1042
|
+
# Defining training data
|
|
1043
|
+
if not self.cluster:
|
|
1044
|
+
# Include lasso data only if lasso selection is enabled
|
|
1045
|
+
data_types = (['lasso', 'rfe', 'pca'] if self.enable_lasso else ['rfe', 'pca'])
|
|
1046
|
+
training_datas = tuple(DataFrame(self.data_mapping[f'{data_type}_train']) for data_type in data_types)
|
|
1047
|
+
else:
|
|
1048
|
+
data_types = ['pca', 'non_pca']
|
|
1049
|
+
training_datas = tuple(DataFrame(self.data_mapping[f'{data_type}_train']) for data_type in data_types)
|
|
1050
|
+
|
|
1051
|
+
if self.task_type == "Classification" and not self.cluster:
|
|
1052
|
+
response_values = training_datas[0].get(self.target_column).drop_duplicate().get_values().flatten().tolist()
|
|
1053
|
+
self.output_response = [str(i) for i in response_values]
|
|
1054
|
+
|
|
1055
|
+
if self.stopping_metric is None:
|
|
1056
|
+
if not self.cluster:
|
|
1057
|
+
self.stopping_tolerance, self.stopping_metric = 1.0, 'MICRO-F1' \
|
|
1058
|
+
if self.is_classification_type() else 'R2'
|
|
1059
|
+
else:
|
|
1060
|
+
self.stopping_tolerance, self.stopping_metric = 1.0, 'SILHOUETTE'
|
|
1061
|
+
|
|
1062
|
+
self.max_runtime_secs = self.max_runtime_secs/len(model_params) \
|
|
1063
|
+
if self.max_runtime_secs is not None else None
|
|
1064
|
+
|
|
1065
|
+
if self.max_models is not None:
|
|
1066
|
+
# Getting model distribution and models to remove
|
|
1067
|
+
self.max_models_distribution, models_to_remove = self.distribute_max_models()
|
|
1068
|
+
# Removing model parameters with 0 distribution
|
|
1069
|
+
if len(models_to_remove):
|
|
1070
|
+
for model in models_to_remove:
|
|
1071
|
+
model_params = [param for param in model_params if param['name'] != model]
|
|
1072
|
+
# Updating progress bar as we are removing model
|
|
1073
|
+
self.progress_bar.update()
|
|
1074
|
+
|
|
1075
|
+
if self.is_classification_type() and not self.cluster:
|
|
1076
|
+
self.startify_col = self.target_column
|
|
1077
|
+
|
|
1078
|
+
trained_models = []
|
|
1079
|
+
|
|
1080
|
+
for param in model_params:
|
|
1081
|
+
result = self._hyperparameter_tunning(param, training_datas)
|
|
1082
|
+
if result is not None:
|
|
1083
|
+
trained_models.append(result)
|
|
1084
|
+
|
|
1085
|
+
if trained_models:
|
|
1086
|
+
models_df = pd.concat(trained_models, ignore_index=True)
|
|
1087
|
+
else:
|
|
1088
|
+
# Create empty DataFrame with exact same columns as successful case
|
|
1089
|
+
if self.cluster:
|
|
1090
|
+
columns = AutoMLConstants.CLUSTERING_COLUMNS.value
|
|
1091
|
+
elif self.task_type == "Classification":
|
|
1092
|
+
columns = AutoMLConstants.CLASSIFICATION_COLUMNS.value
|
|
1093
|
+
elif self.task_type == "Regression":
|
|
1094
|
+
columns = AutoMLConstants.REGRESSION_COLUMNS.value
|
|
1095
|
+
|
|
1096
|
+
models_df = pd.DataFrame(columns=columns)
|
|
1097
|
+
|
|
1098
|
+
return models_df
|
|
1099
|
+
|
|
1100
|
+
def _hyperparameter_tunning(self,
|
|
1101
|
+
model_param,
|
|
1102
|
+
train_data):
|
|
1103
|
+
"""
|
|
1104
|
+
DESCRIPTION:
|
|
1105
|
+
Internal function performs hyperparameter tuning on
|
|
1106
|
+
ML models for regression/classification/clustering problems.
|
|
1107
|
+
|
|
1108
|
+
PARAMETERS:
|
|
1109
|
+
model_param
|
|
1110
|
+
Required Argument.
|
|
1111
|
+
Specifies the eval_params argument for GridSearch.
|
|
1112
|
+
Types: dict
|
|
1113
|
+
|
|
1114
|
+
train_data:
|
|
1115
|
+
Required Argument.
|
|
1116
|
+
Specifies the training datasets.
|
|
1117
|
+
Types: tuple of Teradataml DataFrame
|
|
1118
|
+
|
|
1119
|
+
RETURNS:
|
|
1120
|
+
pandas DataFrame containing, trained models information.
|
|
1121
|
+
|
|
1122
|
+
RAISES:
|
|
1123
|
+
ValueError
|
|
1124
|
+
|
|
1125
|
+
EXAMPLES:
|
|
1126
|
+
>>> models_df = self._hyperparameter_tunning(model_param={'name': 'xgboost', 'max_depth': (5, 7, 10)}, train_data=(lasso_train_data, rfe_train_data, pca_train_data))
|
|
1127
|
+
"""
|
|
1128
|
+
# Passing verbose value based on user input
|
|
1129
|
+
if self.verbose > 0:
|
|
1130
|
+
print(" " *200, end='\r', flush=True)
|
|
1131
|
+
verbose = 1
|
|
1132
|
+
else:
|
|
1133
|
+
verbose = 0
|
|
1134
|
+
|
|
1135
|
+
# Hyperparameter tunning
|
|
1136
|
+
# Parallel run opens multiple connections for parallel execution,
|
|
1137
|
+
# but volatile tables are not accessible across different sessions.
|
|
1138
|
+
# Therefore, execution is performed sequentially by setting run_parallel=False.
|
|
1139
|
+
|
|
1140
|
+
run_parallel = configure.temp_object_type != TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
1141
|
+
|
|
1142
|
+
if not self.cluster:
|
|
1143
|
+
# Mapping model names to functions
|
|
1144
|
+
model_to_func = {"glm": GLM, "svm": SVM,
|
|
1145
|
+
"xgboost": XGBoost, "decision_forest": DecisionForest, "knn": KNN}
|
|
1146
|
+
|
|
1147
|
+
# Setting eval_params for hpt.
|
|
1148
|
+
eval_params = _ModelTraining._eval_params_generation(model_param['name'],
|
|
1149
|
+
self.target_column,
|
|
1150
|
+
self.task_type,
|
|
1151
|
+
self.id_column)
|
|
1152
|
+
|
|
1153
|
+
# Input columns for model
|
|
1154
|
+
model_param['input_columns'] = self.features
|
|
1155
|
+
|
|
1156
|
+
# Setting persist for model
|
|
1157
|
+
model_param['persist'] = self.persist
|
|
1158
|
+
|
|
1159
|
+
self._display_msg(msg=f"Model training for {model_param['name']}",
|
|
1160
|
+
progress_bar=self.progress_bar,
|
|
1161
|
+
show_data=True)
|
|
1162
|
+
|
|
1163
|
+
# As we are using entire data for HPT training. So,
|
|
1164
|
+
# passing prepared training data as test_data for KNN.
|
|
1165
|
+
if model_param['name'] == 'knn':
|
|
1166
|
+
model_param['test_data'] = train_data
|
|
1167
|
+
|
|
1168
|
+
if self.task_type == "Classification":
|
|
1169
|
+
model_param['output_prob'] = True
|
|
1170
|
+
model_param['output_responses'] = self.output_response
|
|
1171
|
+
|
|
1172
|
+
# Using RandomSearch for hyperparameter tunning when max_models is given.
|
|
1173
|
+
# Otherwise, using GridSearch for hyperparameter tunning.
|
|
1174
|
+
if self.max_models is not None:
|
|
1175
|
+
# Setting max_models for RandomSearch based on model name
|
|
1176
|
+
model_param['max_models'] = self.max_models_distribution[model_param['name']]
|
|
1177
|
+
# Defining RandomSearch with ML model based on Name, and max_models
|
|
1178
|
+
_obj = RandomSearch(func=model_to_func[model_param['name']],
|
|
1179
|
+
params=model_param,
|
|
1180
|
+
n_iter=model_param['max_models'])
|
|
1181
|
+
else:
|
|
1182
|
+
# Defining Gridsearch with ML model based on Name
|
|
1183
|
+
_obj = GridSearch(func=model_to_func[model_param['name']],
|
|
1184
|
+
params=model_param)
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
common_params = {
|
|
1188
|
+
"data": train_data,
|
|
1189
|
+
"evaluation_metric": self.stopping_metric,
|
|
1190
|
+
"early_stop": self.stopping_tolerance,
|
|
1191
|
+
"run_parallel": run_parallel,
|
|
1192
|
+
"sample_seed": self.seed,
|
|
1193
|
+
"sample_id_column": self.id_column,
|
|
1194
|
+
"discard_invalid_column_params": True,
|
|
1195
|
+
"stratify_column": self.startify_col,
|
|
1196
|
+
"verbose": verbose,
|
|
1197
|
+
"max_time": self.max_runtime_secs,
|
|
1198
|
+
"suppress_refer_msg": True
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
if model_param['name'] == 'knn':
|
|
1202
|
+
_obj.fit(**common_params)
|
|
1203
|
+
else:
|
|
1204
|
+
_obj.fit(**common_params, **eval_params)
|
|
1205
|
+
|
|
1206
|
+
# Store the object in data_transform_dict
|
|
1207
|
+
self.data_transform_dict[f"hpt_object_{model_param['name']}"] = _obj
|
|
1208
|
+
|
|
1209
|
+
# Getting all passed models
|
|
1210
|
+
model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
|
|
1211
|
+
on='MODEL_ID', how='inner')
|
|
1212
|
+
if not model_info.empty:
|
|
1213
|
+
# Creating mapping data ID to feature selection method
|
|
1214
|
+
if self.enable_lasso:
|
|
1215
|
+
data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
|
|
1216
|
+
"DF_1": ('rfe', train_data[1]._table_name),
|
|
1217
|
+
"DF_2": ('pca', train_data[2]._table_name)}
|
|
1218
|
+
else:
|
|
1219
|
+
data_id_to_table_map = {"DF_0": ('rfe', train_data[0]._table_name),
|
|
1220
|
+
"DF_1": ('pca', train_data[1]._table_name)}
|
|
1221
|
+
|
|
1222
|
+
# Updating model stats with feature selection method and result table
|
|
1223
|
+
for index, row in model_info.iterrows():
|
|
1224
|
+
model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
|
|
1225
|
+
model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
|
|
1226
|
+
model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
|
|
1227
|
+
model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
|
|
1228
|
+
|
|
1229
|
+
# Dropping column 'DATA_ID'
|
|
1230
|
+
model_info.drop(['DATA_ID'], axis=1, inplace=True)
|
|
1231
|
+
|
|
1232
|
+
model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
|
|
1233
|
+
|
|
1234
|
+
if not self.is_classification_type():
|
|
1235
|
+
# Calculating Adjusted-R2 for regression
|
|
1236
|
+
# Getting size and feature count for each feature selection method
|
|
1237
|
+
methods = (['lasso', 'rfe', 'pca'] if self.enable_lasso else ['rfe', 'pca'])
|
|
1238
|
+
size_map = {method : df.select(self.id_column).size for method, df in zip(methods, train_data)}
|
|
1239
|
+
feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
|
|
1240
|
+
model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
|
|
1241
|
+
1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
|
|
1242
|
+
(size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
|
|
1243
|
+
|
|
1244
|
+
self._display_msg(msg="-"*100,
|
|
1245
|
+
progress_bar=self.progress_bar,
|
|
1246
|
+
show_data=True)
|
|
1247
|
+
self.progress_bar.update()
|
|
1248
|
+
|
|
1249
|
+
return model_info
|
|
1250
|
+
# Returning None, if no model is passed
|
|
1251
|
+
return None
|
|
1252
|
+
else:
|
|
1253
|
+
import time
|
|
1254
|
+
from teradataml import td_sklearn as skl
|
|
1255
|
+
|
|
1256
|
+
model_name = model_param['name']
|
|
1257
|
+
|
|
1258
|
+
self._display_msg(msg=f"Model training for {model_name}",
|
|
1259
|
+
progress_bar=self.progress_bar, show_data=True)
|
|
1260
|
+
|
|
1261
|
+
if model_name == "kmeans":
|
|
1262
|
+
model_func = skl.KMeans()
|
|
1263
|
+
param_key = "n_clusters"
|
|
1264
|
+
pred_col = "kmeans_predict_1"
|
|
1265
|
+
elif model_name == "gaussianmixture":
|
|
1266
|
+
model_func = skl.GaussianMixture()
|
|
1267
|
+
param_key = "n_components"
|
|
1268
|
+
pred_col = "gaussianmixture_predict_1"
|
|
1269
|
+
else:
|
|
1270
|
+
raise ValueError(f"Unsupported model: {model_name}")
|
|
1271
|
+
|
|
1272
|
+
model_param["input_columns"] = self.features
|
|
1273
|
+
model_param["persist"] = self.persist
|
|
1274
|
+
|
|
1275
|
+
if self.max_models is not None:
|
|
1276
|
+
model_param['max_models'] = self.max_models_distribution[model_name]
|
|
1277
|
+
|
|
1278
|
+
search_obj = RandomSearch(func=model_func,
|
|
1279
|
+
params=model_param['param_grid'],
|
|
1280
|
+
n_iter=model_param['max_models'])
|
|
1281
|
+
else:
|
|
1282
|
+
search_obj = GridSearch(func=model_func, params=model_param['param_grid'])
|
|
1283
|
+
|
|
1284
|
+
search_obj.fit(data=train_data, evaluation_metric=self.stopping_metric,
|
|
1285
|
+
early_stop=self.stopping_tolerance, run_parallel=run_parallel,
|
|
1286
|
+
sample_seed=self.seed, verbose=verbose, max_time=self.max_runtime_secs)
|
|
1287
|
+
|
|
1288
|
+
# Store the object in data_transform_dict
|
|
1289
|
+
self.data_transform_dict[f"hpt_object_{model_name}"] = search_obj
|
|
1290
|
+
|
|
1291
|
+
model_df = search_obj.models[search_obj.models["STATUS"] == "PASS"]
|
|
1292
|
+
if model_df.empty:
|
|
1293
|
+
print("No models passed. Exiting.")
|
|
1294
|
+
self.progress_bar.update()
|
|
1295
|
+
return None
|
|
1296
|
+
|
|
1297
|
+
model_stats = search_obj.model_stats
|
|
1298
|
+
model_info = model_stats.merge(model_df[['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
|
|
1299
|
+
on="MODEL_ID", how="inner")
|
|
1300
|
+
|
|
1301
|
+
if not model_info.empty:
|
|
1302
|
+
# Creating mapping data ID to feature selection method
|
|
1303
|
+
data_id_to_table_map = {"DF_0": ('pca', train_data[1]._table_name),
|
|
1304
|
+
"DF_1": ('non_pca', train_data[0]._table_name)}
|
|
1305
|
+
|
|
1306
|
+
# Updating model stats with feature selection method and result table
|
|
1307
|
+
for index, row in model_info.iterrows():
|
|
1308
|
+
model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
|
|
1309
|
+
model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
|
|
1310
|
+
model_info.loc[index, 'model-obj'] = search_obj.get_model(row['MODEL_ID'])
|
|
1311
|
+
|
|
1312
|
+
# Dropping column 'DATA_ID'
|
|
1313
|
+
model_info.drop(['DATA_ID'], axis=1, inplace=True)
|
|
1314
|
+
|
|
1315
|
+
model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
|
|
1316
|
+
|
|
1317
|
+
self._display_msg(msg="-"*100,
|
|
1318
|
+
progress_bar=self.progress_bar,
|
|
1319
|
+
show_data=True)
|
|
1320
|
+
self.progress_bar.update()
|
|
1321
|
+
|
|
1322
|
+
return model_info
|
|
1323
|
+
|
|
1324
|
+
return None
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
@staticmethod
|
|
1328
|
+
def _eval_params_generation(ml_name,
|
|
1329
|
+
target_column,
|
|
1330
|
+
task_type,
|
|
1331
|
+
id_column):
|
|
1332
|
+
"""
|
|
1333
|
+
DESCRIPTION:
|
|
1334
|
+
Internal function generates the eval_params for
|
|
1335
|
+
different ML models.
|
|
1336
|
+
|
|
1337
|
+
PARAMETERS:
|
|
1338
|
+
ml_name
|
|
1339
|
+
Required Argument.
|
|
1340
|
+
Specifies the ML name for eval_params generation.
|
|
1341
|
+
Types: str
|
|
1342
|
+
|
|
1343
|
+
target_column
|
|
1344
|
+
Required Argument.
|
|
1345
|
+
Specifies the target column.
|
|
1346
|
+
Types: str
|
|
1347
|
+
|
|
1348
|
+
task_type:
|
|
1349
|
+
Required Argument.
|
|
1350
|
+
Specifies the task type for AutoML, whether to apply regresion
|
|
1351
|
+
or classification on the provived dataset.
|
|
1352
|
+
Permitted Values: "Regression", "Classification"
|
|
1353
|
+
Types: str
|
|
1354
|
+
|
|
1355
|
+
id_column:
|
|
1356
|
+
Required Argument.
|
|
1357
|
+
Specifies the ID column for eval_params generation.
|
|
1358
|
+
Types: str
|
|
1359
|
+
|
|
1360
|
+
RETURNS:
|
|
1361
|
+
dict containing, eval_params for ML model.
|
|
1362
|
+
|
|
1363
|
+
RAISES:
|
|
1364
|
+
None
|
|
1365
|
+
|
|
1366
|
+
EXAMPLES:
|
|
1367
|
+
>>> _ModelTraining._eval_params_generation(ml_name="decision_forest",
|
|
1368
|
+
... target_column="target_class",
|
|
1369
|
+
... task_type="Classification",
|
|
1370
|
+
... id_column="id")
|
|
1371
|
+
"""
|
|
1372
|
+
# Setting the eval_params
|
|
1373
|
+
eval_params = {"id_column": id_column,
|
|
1374
|
+
"accumulate": target_column}
|
|
1375
|
+
|
|
1376
|
+
model_type = {
|
|
1377
|
+
'xgboost': 'model_type',
|
|
1378
|
+
'glm': 'model_type',
|
|
1379
|
+
'decisionforest': 'tree_type',
|
|
1380
|
+
'svm': 'model_type',
|
|
1381
|
+
'knn': 'model_type'
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1384
|
+
ml_name = ml_name.replace('_', '').lower()
|
|
1385
|
+
|
|
1386
|
+
# For Classification
|
|
1387
|
+
if task_type.lower() != "regression":
|
|
1388
|
+
eval_params[model_type[ml_name]] = 'Classification'
|
|
1389
|
+
eval_params['output_prob'] = True
|
|
1390
|
+
|
|
1391
|
+
if ml_name == 'xgboost':
|
|
1392
|
+
eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter','class_num', 'tree_order']
|
|
1393
|
+
|
|
1394
|
+
elif ml_name == 'glm':
|
|
1395
|
+
eval_params['family'] = 'BINOMIAL'
|
|
1396
|
+
|
|
1397
|
+
else:
|
|
1398
|
+
# For Regression
|
|
1399
|
+
eval_params[model_type[ml_name]] = 'Regression'
|
|
1400
|
+
|
|
1401
|
+
if ml_name == 'xgboost':
|
|
1402
|
+
eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter', 'tree_order']
|
|
1403
|
+
|
|
1404
|
+
elif ml_name == 'glm':
|
|
1405
|
+
eval_params['family'] = 'GAUSSIAN'
|
|
1406
|
+
|
|
1407
|
+
return eval_params
|