teradataml 20.0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +2762 -0
- teradataml/__init__.py +78 -0
- teradataml/_version.py +11 -0
- teradataml/analytics/Transformations.py +2996 -0
- teradataml/analytics/__init__.py +82 -0
- teradataml/analytics/analytic_function_executor.py +2416 -0
- teradataml/analytics/analytic_query_generator.py +1050 -0
- teradataml/analytics/byom/H2OPredict.py +514 -0
- teradataml/analytics/byom/PMMLPredict.py +437 -0
- teradataml/analytics/byom/__init__.py +16 -0
- teradataml/analytics/json_parser/__init__.py +133 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
- teradataml/analytics/json_parser/json_store.py +191 -0
- teradataml/analytics/json_parser/metadata.py +1666 -0
- teradataml/analytics/json_parser/utils.py +805 -0
- teradataml/analytics/meta_class.py +236 -0
- teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
- teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
- teradataml/analytics/sqle/__init__.py +128 -0
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
- teradataml/analytics/table_operator/__init__.py +11 -0
- teradataml/analytics/uaf/__init__.py +82 -0
- teradataml/analytics/utils.py +828 -0
- teradataml/analytics/valib.py +1617 -0
- teradataml/automl/__init__.py +5835 -0
- teradataml/automl/autodataprep/__init__.py +493 -0
- teradataml/automl/custom_json_utils.py +1625 -0
- teradataml/automl/data_preparation.py +1384 -0
- teradataml/automl/data_transformation.py +1254 -0
- teradataml/automl/feature_engineering.py +2273 -0
- teradataml/automl/feature_exploration.py +1873 -0
- teradataml/automl/model_evaluation.py +488 -0
- teradataml/automl/model_training.py +1407 -0
- teradataml/catalog/__init__.py +2 -0
- teradataml/catalog/byom.py +1759 -0
- teradataml/catalog/function_argument_mapper.py +859 -0
- teradataml/catalog/model_cataloging_utils.py +491 -0
- teradataml/clients/__init__.py +0 -0
- teradataml/clients/auth_client.py +137 -0
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/clients/pkce_client.py +481 -0
- teradataml/common/__init__.py +1 -0
- teradataml/common/aed_utils.py +2078 -0
- teradataml/common/bulk_exposed_utils.py +113 -0
- teradataml/common/constants.py +1669 -0
- teradataml/common/deprecations.py +166 -0
- teradataml/common/exceptions.py +147 -0
- teradataml/common/formula.py +743 -0
- teradataml/common/garbagecollector.py +666 -0
- teradataml/common/logger.py +1261 -0
- teradataml/common/messagecodes.py +518 -0
- teradataml/common/messages.py +262 -0
- teradataml/common/pylogger.py +67 -0
- teradataml/common/sqlbundle.py +764 -0
- teradataml/common/td_coltype_code_to_tdtype.py +48 -0
- teradataml/common/utils.py +3166 -0
- teradataml/common/warnings.py +36 -0
- teradataml/common/wrapper_utils.py +625 -0
- teradataml/config/__init__.py +0 -0
- teradataml/config/dummy_file1.cfg +5 -0
- teradataml/config/dummy_file2.cfg +3 -0
- teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
- teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
- teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
- teradataml/context/__init__.py +0 -0
- teradataml/context/aed_context.py +223 -0
- teradataml/context/context.py +1462 -0
- teradataml/data/A_loan.csv +19 -0
- teradataml/data/BINARY_REALS_LEFT.csv +11 -0
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
- teradataml/data/B_loan.csv +49 -0
- teradataml/data/BuoyData2.csv +17 -0
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
- teradataml/data/Convolve2RealsLeft.csv +5 -0
- teradataml/data/Convolve2RealsRight.csv +5 -0
- teradataml/data/Convolve2ValidLeft.csv +11 -0
- teradataml/data/Convolve2ValidRight.csv +11 -0
- teradataml/data/DFFTConv_Real_8_8.csv +65 -0
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/Mall_customer_data.csv +201 -0
- teradataml/data/Orders1_12mf.csv +25 -0
- teradataml/data/Pi_loan.csv +7 -0
- teradataml/data/SMOOTHED_DATA.csv +7 -0
- teradataml/data/TestDFFT8.csv +9 -0
- teradataml/data/TestRiver.csv +109 -0
- teradataml/data/Traindata.csv +28 -0
- teradataml/data/__init__.py +0 -0
- teradataml/data/acf.csv +17 -0
- teradataml/data/adaboost_example.json +34 -0
- teradataml/data/adaboostpredict_example.json +24 -0
- teradataml/data/additional_table.csv +11 -0
- teradataml/data/admissions_test.csv +21 -0
- teradataml/data/admissions_train.csv +41 -0
- teradataml/data/admissions_train_nulls.csv +41 -0
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -0
- teradataml/data/ageandpressure.csv +31 -0
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/antiselect_example.json +36 -0
- teradataml/data/antiselect_input.csv +8 -0
- teradataml/data/antiselect_input_mixed_case.csv +8 -0
- teradataml/data/applicant_external.csv +7 -0
- teradataml/data/applicant_reference.csv +7 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/arima_example.json +9 -0
- teradataml/data/assortedtext_input.csv +8 -0
- teradataml/data/attribution_example.json +34 -0
- teradataml/data/attribution_sample_table.csv +27 -0
- teradataml/data/attribution_sample_table1.csv +6 -0
- teradataml/data/attribution_sample_table2.csv +11 -0
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +43 -0
- teradataml/data/bank_web_clicks2.csv +91 -0
- teradataml/data/bank_web_url.csv +85 -0
- teradataml/data/barrier.csv +2 -0
- teradataml/data/barrier_new.csv +3 -0
- teradataml/data/betweenness_example.json +14 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -0
- teradataml/data/bin_fit_ip.csv +4 -0
- teradataml/data/binary_complex_left.csv +11 -0
- teradataml/data/binary_complex_right.csv +11 -0
- teradataml/data/binary_matrix_complex_left.csv +21 -0
- teradataml/data/binary_matrix_complex_right.csv +21 -0
- teradataml/data/binary_matrix_real_left.csv +21 -0
- teradataml/data/binary_matrix_real_right.csv +21 -0
- teradataml/data/blood2ageandweight.csv +26 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -0
- teradataml/data/burst_data.csv +5 -0
- teradataml/data/burst_example.json +21 -0
- teradataml/data/byom_example.json +34 -0
- teradataml/data/bytes_table.csv +4 -0
- teradataml/data/cal_housing_ex_raw.csv +70 -0
- teradataml/data/callers.csv +7 -0
- teradataml/data/calls.csv +10 -0
- teradataml/data/cars_hist.csv +33 -0
- teradataml/data/cat_table.csv +25 -0
- teradataml/data/ccm_example.json +32 -0
- teradataml/data/ccm_input.csv +91 -0
- teradataml/data/ccm_input2.csv +13 -0
- teradataml/data/ccmexample.csv +101 -0
- teradataml/data/ccmprepare_example.json +9 -0
- teradataml/data/ccmprepare_input.csv +91 -0
- teradataml/data/cfilter_example.json +12 -0
- teradataml/data/changepointdetection_example.json +18 -0
- teradataml/data/changepointdetectionrt_example.json +8 -0
- teradataml/data/chi_sq.csv +3 -0
- teradataml/data/churn_data.csv +14 -0
- teradataml/data/churn_emission.csv +35 -0
- teradataml/data/churn_initial.csv +3 -0
- teradataml/data/churn_state_transition.csv +5 -0
- teradataml/data/citedges_2.csv +745 -0
- teradataml/data/citvertices_2.csv +1210 -0
- teradataml/data/clicks2.csv +16 -0
- teradataml/data/clickstream.csv +13 -0
- teradataml/data/clickstream1.csv +11 -0
- teradataml/data/closeness_example.json +16 -0
- teradataml/data/complaints.csv +21 -0
- teradataml/data/complaints_mini.csv +3 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_testtoken.csv +224 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/complaints_tokens_test.csv +353 -0
- teradataml/data/complaints_traintoken.csv +472 -0
- teradataml/data/computers_category.csv +1001 -0
- teradataml/data/computers_test1.csv +1252 -0
- teradataml/data/computers_train1.csv +5009 -0
- teradataml/data/computers_train1_clustered.csv +5009 -0
- teradataml/data/confusionmatrix_example.json +9 -0
- teradataml/data/conversion_event_table.csv +3 -0
- teradataml/data/corr_input.csv +17 -0
- teradataml/data/correlation_example.json +11 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/coxhazardratio_example.json +39 -0
- teradataml/data/coxph_example.json +15 -0
- teradataml/data/coxsurvival_example.json +28 -0
- teradataml/data/cpt.csv +41 -0
- teradataml/data/credit_ex_merged.csv +45 -0
- teradataml/data/creditcard_data.csv +1001 -0
- teradataml/data/customer_loyalty.csv +301 -0
- teradataml/data/customer_loyalty_newseq.csv +31 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +173 -0
- teradataml/data/decisionforest_example.json +37 -0
- teradataml/data/decisionforestpredict_example.json +38 -0
- teradataml/data/decisiontree_example.json +21 -0
- teradataml/data/decisiontreepredict_example.json +45 -0
- teradataml/data/dfft2_size4_real.csv +17 -0
- teradataml/data/dfft2_test_matrix16.csv +17 -0
- teradataml/data/dfft2conv_real_4_4.csv +65 -0
- teradataml/data/diabetes.csv +443 -0
- teradataml/data/diabetes_test.csv +89 -0
- teradataml/data/dict_table.csv +5 -0
- teradataml/data/docperterm_table.csv +4 -0
- teradataml/data/docs/__init__.py +1 -0
- teradataml/data/docs/byom/__init__.py +0 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
- teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
- teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
- teradataml/data/docs/byom/docs/__init__.py +0 -0
- teradataml/data/docs/sqle/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
- teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
- teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/tableoperator/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
- teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/uaf/__init__.py +0 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
- teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
- teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
- teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
- teradataml/data/dtw_example.json +18 -0
- teradataml/data/dtw_t1.csv +11 -0
- teradataml/data/dtw_t2.csv +4 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt2d_example.json +16 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_example.json +15 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/dwt_filter_dim.csv +5 -0
- teradataml/data/emission.csv +9 -0
- teradataml/data/emp_table_by_dept.csv +19 -0
- teradataml/data/employee_info.csv +4 -0
- teradataml/data/employee_table.csv +6 -0
- teradataml/data/excluding_event_table.csv +2 -0
- teradataml/data/finance_data.csv +6 -0
- teradataml/data/finance_data2.csv +61 -0
- teradataml/data/finance_data3.csv +93 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -0
- teradataml/data/fmeasure_example.json +12 -0
- teradataml/data/followers_leaders.csv +10 -0
- teradataml/data/fpgrowth_example.json +12 -0
- teradataml/data/frequentpaths_example.json +29 -0
- teradataml/data/friends.csv +9 -0
- teradataml/data/fs_input.csv +33 -0
- teradataml/data/fs_input1.csv +33 -0
- teradataml/data/genData.csv +513 -0
- teradataml/data/geodataframe_example.json +40 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -0
- teradataml/data/glm_example.json +56 -0
- teradataml/data/glml1l2_example.json +28 -0
- teradataml/data/glml1l2predict_example.json +54 -0
- teradataml/data/glmpredict_example.json +54 -0
- teradataml/data/gq_t1.csv +21 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/hconvolve_complex_right.csv +5 -0
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
- teradataml/data/histogram_example.json +12 -0
- teradataml/data/hmmdecoder_example.json +79 -0
- teradataml/data/hmmevaluator_example.json +25 -0
- teradataml/data/hmmsupervised_example.json +10 -0
- teradataml/data/hmmunsupervised_example.json +8 -0
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/house_values.csv +12 -0
- teradataml/data/house_values2.csv +13 -0
- teradataml/data/housing_cat.csv +7 -0
- teradataml/data/housing_data.csv +9 -0
- teradataml/data/housing_test.csv +47 -0
- teradataml/data/housing_test_binary.csv +47 -0
- teradataml/data/housing_train.csv +493 -0
- teradataml/data/housing_train_attribute.csv +5 -0
- teradataml/data/housing_train_binary.csv +437 -0
- teradataml/data/housing_train_parameter.csv +2 -0
- teradataml/data/housing_train_response.csv +493 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -0
- teradataml/data/ibm_stock1.csv +370 -0
- teradataml/data/identitymatch_example.json +22 -0
- teradataml/data/idf_table.csv +4 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/impressions.csv +101 -0
- teradataml/data/inflation.csv +21 -0
- teradataml/data/initial.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +13 -0
- teradataml/data/insurance.csv +1339 -0
- teradataml/data/interpolator_example.json +13 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/iris_altinput.csv +481 -0
- teradataml/data/iris_attribute_output.csv +8 -0
- teradataml/data/iris_attribute_test.csv +121 -0
- teradataml/data/iris_attribute_train.csv +481 -0
- teradataml/data/iris_category_expect_predict.csv +31 -0
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -0
- teradataml/data/iris_response_train.csv +121 -0
- teradataml/data/iris_test.csv +31 -0
- teradataml/data/iris_train.csv +121 -0
- teradataml/data/join_table1.csv +4 -0
- teradataml/data/join_table2.csv +4 -0
- teradataml/data/jsons/anly_function_name.json +7 -0
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/dataikupredict.json +148 -0
- teradataml/data/jsons/byom/datarobotpredict.json +147 -0
- teradataml/data/jsons/byom/h2opredict.json +195 -0
- teradataml/data/jsons/byom/onnxembeddings.json +267 -0
- teradataml/data/jsons/byom/onnxpredict.json +187 -0
- teradataml/data/jsons/byom/pmmlpredict.json +147 -0
- teradataml/data/jsons/paired_functions.json +450 -0
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/kmeans_example.json +23 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +51 -0
- teradataml/data/knn_example.json +19 -0
- teradataml/data/knnrecommender_example.json +7 -0
- teradataml/data/knnrecommenderpredict_example.json +12 -0
- teradataml/data/lar_example.json +17 -0
- teradataml/data/larpredict_example.json +30 -0
- teradataml/data/lc_new_predictors.csv +5 -0
- teradataml/data/lc_new_reference.csv +9 -0
- teradataml/data/lda_example.json +9 -0
- teradataml/data/ldainference_example.json +15 -0
- teradataml/data/ldatopicsummary_example.json +9 -0
- teradataml/data/levendist_input.csv +13 -0
- teradataml/data/levenshteindistance_example.json +10 -0
- teradataml/data/linreg_example.json +10 -0
- teradataml/data/load_example_data.py +350 -0
- teradataml/data/loan_prediction.csv +295 -0
- teradataml/data/lungcancer.csv +138 -0
- teradataml/data/mappingdata.csv +12 -0
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/milk_timeseries.csv +157 -0
- teradataml/data/min_max_titanic.csv +4 -0
- teradataml/data/minhash_example.json +6 -0
- teradataml/data/ml_ratings.csv +7547 -0
- teradataml/data/ml_ratings_10.csv +2445 -0
- teradataml/data/mobile_data.csv +13 -0
- teradataml/data/model1_table.csv +5 -0
- teradataml/data/model2_table.csv +5 -0
- teradataml/data/models/License_file.txt +1 -0
- teradataml/data/models/License_file_empty.txt +0 -0
- teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
- teradataml/data/models/dr_iris_rf +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
- teradataml/data/models/iris_db_glm_model.pmml +57 -0
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
- teradataml/data/models/iris_kmeans_model +0 -0
- teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
- teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
- teradataml/data/modularity_example.json +12 -0
- teradataml/data/movavg_example.json +8 -0
- teradataml/data/mtx1.csv +7 -0
- teradataml/data/mtx2.csv +13 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -0
- teradataml/data/naivebayes_example.json +10 -0
- teradataml/data/naivebayespredict_example.json +19 -0
- teradataml/data/naivebayestextclassifier2_example.json +7 -0
- teradataml/data/naivebayestextclassifier_example.json +8 -0
- teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
- teradataml/data/name_Find_configure.csv +10 -0
- teradataml/data/namedentityfinder_example.json +14 -0
- teradataml/data/namedentityfinderevaluator_example.json +10 -0
- teradataml/data/namedentityfindertrainer_example.json +6 -0
- teradataml/data/nb_iris_input_test.csv +31 -0
- teradataml/data/nb_iris_input_train.csv +121 -0
- teradataml/data/nbp_iris_model.csv +13 -0
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_extractor_text.csv +2 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/ner_sports_test2.csv +29 -0
- teradataml/data/ner_sports_train.csv +501 -0
- teradataml/data/nerevaluator_example.json +6 -0
- teradataml/data/nerextractor_example.json +18 -0
- teradataml/data/nermem_sports_test.csv +18 -0
- teradataml/data/nermem_sports_train.csv +51 -0
- teradataml/data/nertrainer_example.json +7 -0
- teradataml/data/ngrams_example.json +7 -0
- teradataml/data/notebooks/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
- teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
- teradataml/data/npath_example.json +23 -0
- teradataml/data/ntree_example.json +14 -0
- teradataml/data/numeric_strings.csv +5 -0
- teradataml/data/numerics.csv +4 -0
- teradataml/data/ocean_buoy.csv +17 -0
- teradataml/data/ocean_buoy2.csv +17 -0
- teradataml/data/ocean_buoys.csv +28 -0
- teradataml/data/ocean_buoys2.csv +10 -0
- teradataml/data/ocean_buoys_nonpti.csv +28 -0
- teradataml/data/ocean_buoys_seq.csv +29 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -0
- teradataml/data/orders1.csv +11 -0
- teradataml/data/orders1_12.csv +13 -0
- teradataml/data/orders_ex.csv +4 -0
- teradataml/data/pack_example.json +9 -0
- teradataml/data/package_tracking.csv +19 -0
- teradataml/data/package_tracking_pti.csv +19 -0
- teradataml/data/pagerank_example.json +13 -0
- teradataml/data/paragraphs_input.csv +6 -0
- teradataml/data/pathanalyzer_example.json +8 -0
- teradataml/data/pathgenerator_example.json +8 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/phrases.csv +7 -0
- teradataml/data/pivot_example.json +9 -0
- teradataml/data/pivot_input.csv +22 -0
- teradataml/data/playerRating.csv +31 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/postagger_example.json +7 -0
- teradataml/data/posttagger_output.csv +44 -0
- teradataml/data/production_data.csv +17 -0
- teradataml/data/production_data2.csv +7 -0
- teradataml/data/randomsample_example.json +32 -0
- teradataml/data/randomwalksample_example.json +9 -0
- teradataml/data/rank_table.csv +6 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/ref_mobile_data.csv +4 -0
- teradataml/data/ref_mobile_data_dense.csv +2 -0
- teradataml/data/ref_url.csv +17 -0
- teradataml/data/restaurant_reviews.csv +7 -0
- teradataml/data/retail_churn_table.csv +27772 -0
- teradataml/data/river_data.csv +145 -0
- teradataml/data/roc_example.json +8 -0
- teradataml/data/roc_input.csv +101 -0
- teradataml/data/rule_inputs.csv +6 -0
- teradataml/data/rule_table.csv +2 -0
- teradataml/data/sales.csv +7 -0
- teradataml/data/sales_transaction.csv +501 -0
- teradataml/data/salesdata.csv +342 -0
- teradataml/data/sample_cities.csv +3 -0
- teradataml/data/sample_shapes.csv +11 -0
- teradataml/data/sample_streets.csv +3 -0
- teradataml/data/sampling_example.json +16 -0
- teradataml/data/sax_example.json +17 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -0
- teradataml/data/scale_housing.csv +11 -0
- teradataml/data/scale_housing_test.csv +6 -0
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -0
- teradataml/data/scalebypartition_example.json +13 -0
- teradataml/data/scalemap_example.json +13 -0
- teradataml/data/scalesummary_example.json +12 -0
- teradataml/data/score_category.csv +101 -0
- teradataml/data/score_summary.csv +4 -0
- teradataml/data/script_example.json +10 -0
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/lightgbm/dataset.template +175 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +16 -0
- teradataml/data/scripts/mapper_replace.py +16 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/seeds.csv +10 -0
- teradataml/data/sentenceextractor_example.json +7 -0
- teradataml/data/sentiment_extract_input.csv +11 -0
- teradataml/data/sentiment_train.csv +16 -0
- teradataml/data/sentiment_word.csv +20 -0
- teradataml/data/sentiment_word_input.csv +20 -0
- teradataml/data/sentimentextractor_example.json +24 -0
- teradataml/data/sentimenttrainer_example.json +8 -0
- teradataml/data/sequence_table.csv +10 -0
- teradataml/data/seriessplitter_example.json +8 -0
- teradataml/data/sessionize_example.json +17 -0
- teradataml/data/sessionize_table.csv +116 -0
- teradataml/data/setop_test1.csv +24 -0
- teradataml/data/setop_test2.csv +22 -0
- teradataml/data/soc_nw_edges.csv +11 -0
- teradataml/data/soc_nw_vertices.csv +8 -0
- teradataml/data/souvenir_timeseries.csv +168 -0
- teradataml/data/sparse_iris_attribute.csv +5 -0
- teradataml/data/sparse_iris_test.csv +121 -0
- teradataml/data/sparse_iris_train.csv +601 -0
- teradataml/data/star1.csv +6 -0
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/state_transition.csv +5 -0
- teradataml/data/stock_data.csv +53 -0
- teradataml/data/stock_movement.csv +11 -0
- teradataml/data/stock_vol.csv +76 -0
- teradataml/data/stop_words.csv +8 -0
- teradataml/data/store_sales.csv +37 -0
- teradataml/data/stringsimilarity_example.json +8 -0
- teradataml/data/strsimilarity_input.csv +13 -0
- teradataml/data/students.csv +101 -0
- teradataml/data/svm_iris_input_test.csv +121 -0
- teradataml/data/svm_iris_input_train.csv +481 -0
- teradataml/data/svm_iris_model.csv +7 -0
- teradataml/data/svmdense_example.json +10 -0
- teradataml/data/svmdensepredict_example.json +19 -0
- teradataml/data/svmsparse_example.json +8 -0
- teradataml/data/svmsparsepredict_example.json +14 -0
- teradataml/data/svmsparsesummary_example.json +8 -0
- teradataml/data/target_mobile_data.csv +13 -0
- teradataml/data/target_mobile_data_dense.csv +5 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/templatedata.csv +1201 -0
- teradataml/data/templates/open_source_ml.json +11 -0
- teradataml/data/teradata_icon.ico +0 -0
- teradataml/data/teradataml_example.json +1473 -0
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -0
- teradataml/data/test_pacf_12.csv +37 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -0
- teradataml/data/text_inputs.csv +6 -0
- teradataml/data/textchunker_example.json +8 -0
- teradataml/data/textclassifier_example.json +7 -0
- teradataml/data/textclassifier_input.csv +7 -0
- teradataml/data/textclassifiertrainer_example.json +7 -0
- teradataml/data/textmorph_example.json +11 -0
- teradataml/data/textparser_example.json +15 -0
- teradataml/data/texttagger_example.json +12 -0
- teradataml/data/texttokenizer_example.json +7 -0
- teradataml/data/texttrainer_input.csv +11 -0
- teradataml/data/tf_example.json +7 -0
- teradataml/data/tfidf_example.json +14 -0
- teradataml/data/tfidf_input1.csv +201 -0
- teradataml/data/tfidf_train.csv +6 -0
- teradataml/data/time_table1.csv +535 -0
- teradataml/data/time_table2.csv +14 -0
- teradataml/data/timeseriesdata.csv +1601 -0
- teradataml/data/timeseriesdatasetsd4.csv +105 -0
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic.csv +892 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/token_table.csv +696 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +28 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/transformation_table.csv +6 -0
- teradataml/data/transformation_table_new.csv +2 -0
- teradataml/data/tv_spots.csv +16 -0
- teradataml/data/twod_climate_data.csv +117 -0
- teradataml/data/uaf_example.json +529 -0
- teradataml/data/univariatestatistics_example.json +9 -0
- teradataml/data/unpack_example.json +10 -0
- teradataml/data/unpivot_example.json +25 -0
- teradataml/data/unpivot_input.csv +8 -0
- teradataml/data/url_data.csv +10 -0
- teradataml/data/us_air_pass.csv +37 -0
- teradataml/data/us_population.csv +624 -0
- teradataml/data/us_states_shapes.csv +52 -0
- teradataml/data/varmax_example.json +18 -0
- teradataml/data/vectordistance_example.json +30 -0
- teradataml/data/ville_climatedata.csv +121 -0
- teradataml/data/ville_tempdata.csv +12 -0
- teradataml/data/ville_tempdata1.csv +12 -0
- teradataml/data/ville_temperature.csv +11 -0
- teradataml/data/waveletTable.csv +1605 -0
- teradataml/data/waveletTable2.csv +1605 -0
- teradataml/data/weightedmovavg_example.json +9 -0
- teradataml/data/wft_testing.csv +5 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +6 -0
- teradataml/data/word_embed_input_table2.csv +5 -0
- teradataml/data/word_embed_model.csv +23 -0
- teradataml/data/words_input.csv +13 -0
- teradataml/data/xconvolve_complex_left.csv +6 -0
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
- teradataml/data/xgboost_example.json +36 -0
- teradataml/data/xgboostpredict_example.json +32 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/__init__.py +0 -0
- teradataml/dataframe/copy_to.py +2446 -0
- teradataml/dataframe/data_transfer.py +2840 -0
- teradataml/dataframe/dataframe.py +20908 -0
- teradataml/dataframe/dataframe_utils.py +2114 -0
- teradataml/dataframe/fastload.py +794 -0
- teradataml/dataframe/functions.py +2110 -0
- teradataml/dataframe/indexer.py +424 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +1171 -0
- teradataml/dataframe/sql.py +10904 -0
- teradataml/dataframe/sql_function_parameters.py +440 -0
- teradataml/dataframe/sql_functions.py +652 -0
- teradataml/dataframe/sql_interfaces.py +220 -0
- teradataml/dataframe/vantage_function_types.py +675 -0
- teradataml/dataframe/window.py +694 -0
- teradataml/dbutils/__init__.py +3 -0
- teradataml/dbutils/dbutils.py +2871 -0
- teradataml/dbutils/filemgr.py +318 -0
- teradataml/gen_ai/__init__.py +2 -0
- teradataml/gen_ai/convAI.py +473 -0
- teradataml/geospatial/__init__.py +4 -0
- teradataml/geospatial/geodataframe.py +1105 -0
- teradataml/geospatial/geodataframecolumn.py +392 -0
- teradataml/geospatial/geometry_types.py +926 -0
- teradataml/hyperparameter_tuner/__init__.py +1 -0
- teradataml/hyperparameter_tuner/optimizer.py +4115 -0
- teradataml/hyperparameter_tuner/utils.py +303 -0
- teradataml/lib/__init__.py +0 -0
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/lib/libaed_0_1_ppc64le.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/_base.py +1321 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/_constants.py +61 -0
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +267 -0
- teradataml/options/__init__.py +148 -0
- teradataml/options/configure.py +489 -0
- teradataml/options/display.py +187 -0
- teradataml/plot/__init__.py +3 -0
- teradataml/plot/axis.py +1427 -0
- teradataml/plot/constants.py +15 -0
- teradataml/plot/figure.py +431 -0
- teradataml/plot/plot.py +810 -0
- teradataml/plot/query_generator.py +83 -0
- teradataml/plot/subplot.py +216 -0
- teradataml/scriptmgmt/UserEnv.py +4273 -0
- teradataml/scriptmgmt/__init__.py +3 -0
- teradataml/scriptmgmt/lls_utils.py +2157 -0
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/__init__.py +0 -0
- teradataml/series/series.py +537 -0
- teradataml/series/series_utils.py +71 -0
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +658 -0
- teradataml/store/feature_store/feature_store.py +4814 -0
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +7330 -0
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/Apply.py +979 -0
- teradataml/table_operators/Script.py +1739 -0
- teradataml/table_operators/TableOperator.py +1343 -0
- teradataml/table_operators/__init__.py +2 -0
- teradataml/table_operators/apply_query_generator.py +262 -0
- teradataml/table_operators/query_generator.py +493 -0
- teradataml/table_operators/table_operator_query_generator.py +462 -0
- teradataml/table_operators/table_operator_util.py +726 -0
- teradataml/table_operators/templates/dataframe_apply.template +184 -0
- teradataml/table_operators/templates/dataframe_map.template +176 -0
- teradataml/table_operators/templates/dataframe_register.template +73 -0
- teradataml/table_operators/templates/dataframe_udf.template +67 -0
- teradataml/table_operators/templates/script_executor.template +170 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +53 -0
- teradataml/utils/__init__.py +0 -0
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +943 -0
- teradataml/utils/internal_buffer.py +122 -0
- teradataml/utils/print_versions.py +206 -0
- teradataml/utils/utils.py +451 -0
- teradataml/utils/validators.py +3305 -0
- teradataml-20.0.0.8.dist-info/METADATA +2804 -0
- teradataml-20.0.0.8.dist-info/RECORD +1208 -0
- teradataml-20.0.0.8.dist-info/WHEEL +5 -0
- teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
- teradataml-20.0.0.8.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,1254 @@
|
|
|
1
|
+
# ##################################################################
|
|
2
|
+
#
|
|
3
|
+
# Copyright 2025 Teradata. All rights reserved.
|
|
4
|
+
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
5
|
+
#
|
|
6
|
+
# Primary Owner: Sweta Shaw
|
|
7
|
+
# Email Id: Sweta.Shaw@Teradata.com
|
|
8
|
+
#
|
|
9
|
+
# Secondary Owner: Akhil Bisht
|
|
10
|
+
# Email Id: AKHIL.BISHT@Teradata.com
|
|
11
|
+
#
|
|
12
|
+
# Version: 1.1
|
|
13
|
+
# Function Version: 1.0
|
|
14
|
+
# ##################################################################
|
|
15
|
+
|
|
16
|
+
# Python libraries
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
20
|
+
# Teradata libraries
|
|
21
|
+
from teradataml.dataframe.dataframe import DataFrame
|
|
22
|
+
from teradataml.dataframe.copy_to import copy_to_sql
|
|
23
|
+
from teradataml import Antiselect
|
|
24
|
+
from teradataml import BincodeTransform
|
|
25
|
+
from teradataml import ConvertTo
|
|
26
|
+
from teradataml import execute_sql
|
|
27
|
+
from teradataml import FillRowId
|
|
28
|
+
from teradataml import NonLinearCombineTransform
|
|
29
|
+
from teradataml import OneHotEncodingTransform
|
|
30
|
+
from teradataml import OrdinalEncodingTransform
|
|
31
|
+
from teradataml import RoundColumns
|
|
32
|
+
from teradataml import ScaleTransform
|
|
33
|
+
from teradataml import SimpleImputeTransform
|
|
34
|
+
from teradataml import TargetEncodingTransform
|
|
35
|
+
from teradataml import Transform, UtilFuncs, TeradataConstants
|
|
36
|
+
from teradataml.common.garbagecollector import GarbageCollector
|
|
37
|
+
from teradataml.hyperparameter_tuner.utils import _ProgressBar
|
|
38
|
+
from teradataml.options.configure import configure
|
|
39
|
+
from teradataml.common.constants import TeradataConstants
|
|
40
|
+
from teradataml.common.logger import TeradataMlLogger, get_td_logger
|
|
41
|
+
|
|
42
|
+
# AutoML Internal libraries
|
|
43
|
+
from teradataml.automl.feature_exploration import _FeatureExplore
|
|
44
|
+
from teradataml.automl.feature_engineering import _FeatureEngineering
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@TeradataMlLogger
|
|
48
|
+
class _DataTransformation(_FeatureExplore, _FeatureEngineering):
|
|
49
|
+
|
|
50
|
+
def __init__(self,
|
|
51
|
+
data,
|
|
52
|
+
data_transformation_params,
|
|
53
|
+
auto=True,
|
|
54
|
+
verbose=0,
|
|
55
|
+
target_column_ind=False,
|
|
56
|
+
table_name_mapping={},
|
|
57
|
+
cluster=False,
|
|
58
|
+
enable_lasso=False):
|
|
59
|
+
"""
|
|
60
|
+
DESCRIPTION:
|
|
61
|
+
Function initializes the data, data transformation object and running mode
|
|
62
|
+
for data transformation.
|
|
63
|
+
|
|
64
|
+
PARAMETERS:
|
|
65
|
+
data:
|
|
66
|
+
Required Argument.
|
|
67
|
+
Specifies the input teradataml Dataframe for data transformation phase.
|
|
68
|
+
Types: teradataml Dataframe
|
|
69
|
+
|
|
70
|
+
data_transformation_params:
|
|
71
|
+
Required Argument.
|
|
72
|
+
Specifies the parameters for performing data transformation.
|
|
73
|
+
Types: dict
|
|
74
|
+
|
|
75
|
+
auto:
|
|
76
|
+
Optional Argument.
|
|
77
|
+
Specifies whether to run AutoML in custom mode or auto mode.
|
|
78
|
+
When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
|
|
79
|
+
Default Value: True
|
|
80
|
+
Types: bool
|
|
81
|
+
|
|
82
|
+
verbose:
|
|
83
|
+
Optional Argument.
|
|
84
|
+
Specifies the detailed execution steps based on verbose level.
|
|
85
|
+
Default Value: 0
|
|
86
|
+
Permitted Values:
|
|
87
|
+
* 0: prints no details about data transformation.
|
|
88
|
+
* 1: prints the execution steps of data transformation.
|
|
89
|
+
* 2: prints the intermediate data between the each step of data transformation.
|
|
90
|
+
Types: int
|
|
91
|
+
|
|
92
|
+
target_column_ind:
|
|
93
|
+
Optional Argument.
|
|
94
|
+
Specifies whether target column is present in given dataset.
|
|
95
|
+
Default Value: False
|
|
96
|
+
Types: bool
|
|
97
|
+
|
|
98
|
+
table_name_mapping:
|
|
99
|
+
Optional Argument.
|
|
100
|
+
Specifies the mapping of table names for the transformed data.
|
|
101
|
+
Default Value: {}
|
|
102
|
+
Types: dict
|
|
103
|
+
|
|
104
|
+
cluster:
|
|
105
|
+
Optional Argument.
|
|
106
|
+
Specifies whether to apply clustering techniques.
|
|
107
|
+
Default Value: False
|
|
108
|
+
Types: bool
|
|
109
|
+
|
|
110
|
+
enable_lasso:
|
|
111
|
+
Optional Argument.
|
|
112
|
+
Specifies whether to use lasso regression for feature selection.
|
|
113
|
+
By default, only RFE and PCA are used for feature selection.
|
|
114
|
+
Default Value: False
|
|
115
|
+
Types: bool
|
|
116
|
+
|
|
117
|
+
RETURNS:
|
|
118
|
+
None
|
|
119
|
+
|
|
120
|
+
RAISES:
|
|
121
|
+
None
|
|
122
|
+
|
|
123
|
+
EXAMPLES:
|
|
124
|
+
>>> transformer = _DataTransformation(data=processed_df,
|
|
125
|
+
... data_transformation_params=transform_dict,
|
|
126
|
+
... auto=True,
|
|
127
|
+
... verbose=1,
|
|
128
|
+
... target_column_ind=True,
|
|
129
|
+
... table_name_mapping={"train": "train_table"},
|
|
130
|
+
... cluster=False,
|
|
131
|
+
... enable_lasso=True)
|
|
132
|
+
"""
|
|
133
|
+
self.data = data
|
|
134
|
+
self.data_transformation_params = data_transformation_params
|
|
135
|
+
self.auto = auto
|
|
136
|
+
self.verbose = verbose
|
|
137
|
+
self.target_column_ind = target_column_ind
|
|
138
|
+
self.table_name_mapping = table_name_mapping
|
|
139
|
+
self.data_types = {key: value for key, value in self.data._column_names_and_types}
|
|
140
|
+
self.data_node_id = data._nodeid
|
|
141
|
+
self.table_name_mapping[self.data_node_id] = {}
|
|
142
|
+
self.cluster = cluster
|
|
143
|
+
self.enable_lasso = enable_lasso
|
|
144
|
+
|
|
145
|
+
def data_transformation(self):
|
|
146
|
+
"""
|
|
147
|
+
DESCRIPTION:
|
|
148
|
+
Function to perform following tasks:
|
|
149
|
+
1. Performs transformation carried out in feature engineering phase on the test data.
|
|
150
|
+
2. Performs transformation carried out in data preparation phase on the test data.
|
|
151
|
+
|
|
152
|
+
PARAMETERS:
|
|
153
|
+
None
|
|
154
|
+
|
|
155
|
+
RETURNS:
|
|
156
|
+
dict containing table name mapping for the transformed data.
|
|
157
|
+
|
|
158
|
+
RAISES:
|
|
159
|
+
None
|
|
160
|
+
|
|
161
|
+
EXAMPLES:
|
|
162
|
+
>>> transformer = _DataTransformation(data=processed_df,
|
|
163
|
+
... data_transformation_params=transform_dict,
|
|
164
|
+
... auto=True,
|
|
165
|
+
... verbose=1,
|
|
166
|
+
... target_column_ind=True,
|
|
167
|
+
... table_name_mapping={"train": "train_table"},
|
|
168
|
+
... cluster=False,
|
|
169
|
+
... enable_lasso=True)
|
|
170
|
+
>>> table_mapping = transformer.data_transformation()
|
|
171
|
+
"""
|
|
172
|
+
# Extracting target column details and type whether it is classification or not
|
|
173
|
+
self.data_target_column = self.data_transformation_params.get("data_target_column")
|
|
174
|
+
self.classification_type = self.data_transformation_params.get("classification_type", False)
|
|
175
|
+
self.id_column = self.data_transformation_params.get("data_id_column")
|
|
176
|
+
|
|
177
|
+
# Initializing Feature Exploration
|
|
178
|
+
_FeatureExplore.__init__(self,
|
|
179
|
+
data=self.data,
|
|
180
|
+
target_column=self.data_target_column,
|
|
181
|
+
verbose=self.verbose,
|
|
182
|
+
cluster=self.cluster)
|
|
183
|
+
# Initializing Feature Engineering
|
|
184
|
+
_FeatureEngineering.__init__(self,
|
|
185
|
+
data=self.data,
|
|
186
|
+
target_column=self.data_target_column,
|
|
187
|
+
id_column=self.id_column,
|
|
188
|
+
model_list=None,
|
|
189
|
+
verbose=self.verbose,
|
|
190
|
+
cluster=self.cluster,
|
|
191
|
+
enable_lasso=self.enable_lasso)
|
|
192
|
+
|
|
193
|
+
self._display_msg(msg="Data Transformation started ...", show_data=True)
|
|
194
|
+
# Setting number of jobs for progress bar based on mode of execution
|
|
195
|
+
# Note: cluster and enable_lasso are mutually exclusive
|
|
196
|
+
if self.cluster:
|
|
197
|
+
jobs = 9
|
|
198
|
+
elif self.enable_lasso:
|
|
199
|
+
jobs = 10 if self.auto else 15
|
|
200
|
+
else:
|
|
201
|
+
jobs = 9 if self.auto else 14
|
|
202
|
+
|
|
203
|
+
self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Transformation Running:')
|
|
204
|
+
|
|
205
|
+
# Performing transformation carried out in feature engineering phase
|
|
206
|
+
self.feature_engineering_transformation()
|
|
207
|
+
|
|
208
|
+
# Performing transformation carried out in data preparation phase
|
|
209
|
+
self.data_preparation_transformation()
|
|
210
|
+
self._display_msg(msg="Data Transformation completed.", show_data=True)
|
|
211
|
+
|
|
212
|
+
return self.table_name_mapping
|
|
213
|
+
|
|
214
|
+
def feature_engineering_transformation(self):
|
|
215
|
+
"""
|
|
216
|
+
DESCRIPTION:
|
|
217
|
+
Function performs transformation carried out in feature engineering phase
|
|
218
|
+
on test data using parameters from data_transformation_params.
|
|
219
|
+
|
|
220
|
+
PARAMETERS:
|
|
221
|
+
None
|
|
222
|
+
|
|
223
|
+
RETURNS:
|
|
224
|
+
None
|
|
225
|
+
|
|
226
|
+
RAISES:
|
|
227
|
+
None
|
|
228
|
+
|
|
229
|
+
EXAMPLES:
|
|
230
|
+
>>> transformer = _DataTransformation(data=processed_df,
|
|
231
|
+
... data_transformation_params=transform_dict,
|
|
232
|
+
... auto=True,
|
|
233
|
+
... verbose=1,
|
|
234
|
+
... target_column_ind=True,
|
|
235
|
+
... table_name_mapping={"train": "train_table"},
|
|
236
|
+
... cluster=False,
|
|
237
|
+
... feature_selection_method="lasso")
|
|
238
|
+
>>> transformer.feature_engineering_transformation()
|
|
239
|
+
"""
|
|
240
|
+
self._display_msg(msg="Performing transformation carried out in feature engineering phase ...",
|
|
241
|
+
show_data=True,
|
|
242
|
+
progress_bar=self.progress_bar)
|
|
243
|
+
|
|
244
|
+
# Performing default transformation for both auto and custom mode
|
|
245
|
+
self._preprocess_transformation()
|
|
246
|
+
self.progress_bar.update()
|
|
247
|
+
|
|
248
|
+
self._futile_column_handling_transformation()
|
|
249
|
+
self.progress_bar.update()
|
|
250
|
+
|
|
251
|
+
# Handling target column transformation
|
|
252
|
+
if not self.cluster:
|
|
253
|
+
if self.target_column_ind and self.classification_type:
|
|
254
|
+
self._handle_target_column_transformation()
|
|
255
|
+
self.progress_bar.update()
|
|
256
|
+
|
|
257
|
+
self._date_column_handling_transformation()
|
|
258
|
+
self.progress_bar.update()
|
|
259
|
+
|
|
260
|
+
# Performing transformation according to run mode
|
|
261
|
+
if self.auto:
|
|
262
|
+
self._missing_value_handling_transformation()
|
|
263
|
+
self.progress_bar.update()
|
|
264
|
+
|
|
265
|
+
self._categorical_encoding_transformation()
|
|
266
|
+
self.progress_bar.update()
|
|
267
|
+
else:
|
|
268
|
+
self._custom_missing_value_handling_transformation()
|
|
269
|
+
self.progress_bar.update()
|
|
270
|
+
|
|
271
|
+
self._custom_bincode_column_transformation()
|
|
272
|
+
self.progress_bar.update()
|
|
273
|
+
|
|
274
|
+
self._custom_string_column_transformation()
|
|
275
|
+
self.progress_bar.update()
|
|
276
|
+
|
|
277
|
+
self._custom_categorical_encoding_transformation()
|
|
278
|
+
self.progress_bar.update()
|
|
279
|
+
|
|
280
|
+
self._custom_mathematical_transformation()
|
|
281
|
+
self.progress_bar.update()
|
|
282
|
+
|
|
283
|
+
self._custom_non_linear_transformation()
|
|
284
|
+
self.progress_bar.update()
|
|
285
|
+
|
|
286
|
+
self._custom_anti_select_column_transformation()
|
|
287
|
+
self.progress_bar.update()
|
|
288
|
+
|
|
289
|
+
def data_preparation_transformation(self):
|
|
290
|
+
"""
|
|
291
|
+
DESCRIPTION:
|
|
292
|
+
Function performs transformation carried out in data preparation phase
|
|
293
|
+
on test data using parameters from data_transformation_params.
|
|
294
|
+
|
|
295
|
+
RETURNS:
|
|
296
|
+
None
|
|
297
|
+
|
|
298
|
+
RAISES:
|
|
299
|
+
None
|
|
300
|
+
|
|
301
|
+
EXAMPLES:
|
|
302
|
+
>>> transformer = _DataTransformation(data=processed_df,
|
|
303
|
+
... data_transformation_params=transform_dict,
|
|
304
|
+
... auto=True,
|
|
305
|
+
... verbose=1,
|
|
306
|
+
... target_column_ind=True,
|
|
307
|
+
... table_name_mapping={"train": "train_table"},
|
|
308
|
+
... cluster=False)
|
|
309
|
+
>>> transformer.data_preparation_transformation()
|
|
310
|
+
"""
|
|
311
|
+
self._display_msg(msg="Performing transformation carried out in data preparation phase ...",
|
|
312
|
+
show_data=True,
|
|
313
|
+
progress_bar=self.progress_bar)
|
|
314
|
+
|
|
315
|
+
# Handling features transformed from feature engineering phase
|
|
316
|
+
self._handle_generated_features_transformation()
|
|
317
|
+
self.progress_bar.update()
|
|
318
|
+
|
|
319
|
+
# Performing transformation including feature selection using lasso, rfe and pca
|
|
320
|
+
# followed by scaling
|
|
321
|
+
if not self.cluster:
|
|
322
|
+
if self.enable_lasso:
|
|
323
|
+
self._feature_selection_lasso_transformation()
|
|
324
|
+
self.progress_bar.update()
|
|
325
|
+
|
|
326
|
+
self._feature_selection_rfe_transformation()
|
|
327
|
+
self.progress_bar.update()
|
|
328
|
+
|
|
329
|
+
self._feature_selection_pca_transformation()
|
|
330
|
+
self.progress_bar.update()
|
|
331
|
+
else:
|
|
332
|
+
self._feature_selection_pca_transformation()
|
|
333
|
+
self.progress_bar.update()
|
|
334
|
+
|
|
335
|
+
self._feature_selection_non_pca_transformation()
|
|
336
|
+
self.progress_bar.update()
|
|
337
|
+
|
|
338
|
+
def _preprocess_transformation(self):
|
|
339
|
+
"""
|
|
340
|
+
DESCRIPTION:
|
|
341
|
+
Function drops irrelevent columns and adds id column.
|
|
342
|
+
|
|
343
|
+
PARAMETERS:
|
|
344
|
+
None
|
|
345
|
+
|
|
346
|
+
RETURNS:
|
|
347
|
+
None
|
|
348
|
+
|
|
349
|
+
RAISES:
|
|
350
|
+
None
|
|
351
|
+
|
|
352
|
+
EXAMPLES:
|
|
353
|
+
>>> self._preprocess_transformation()
|
|
354
|
+
"""
|
|
355
|
+
# Extracting irrelevant column list
|
|
356
|
+
columns_to_be_removed = self.data_transformation_params.get("drop_irrelevant_columns", None)
|
|
357
|
+
if columns_to_be_removed:
|
|
358
|
+
self.data = self.data.drop(columns_to_be_removed, axis=1)
|
|
359
|
+
self._display_msg(msg="Updated dataset after dropping irrelevant columns :",
|
|
360
|
+
data=self.data,
|
|
361
|
+
progress_bar=self.progress_bar)
|
|
362
|
+
|
|
363
|
+
# Adding id column extracted from data transformation parameters
|
|
364
|
+
if self.id_column == 'automl_id':
|
|
365
|
+
self.data = FillRowId(data=self.data, row_id_column=self.id_column).result
|
|
366
|
+
|
|
367
|
+
self.table_name_mapping[self.data_node_id]["raw_data_with_id"] = self.data._table_name
|
|
368
|
+
|
|
369
|
+
def _futile_column_handling_transformation(self):
|
|
370
|
+
"""
|
|
371
|
+
DESCRIPTION:
|
|
372
|
+
Function drops futile columns from dataset.
|
|
373
|
+
|
|
374
|
+
PARAMETERS:
|
|
375
|
+
None
|
|
376
|
+
|
|
377
|
+
RETURNS:
|
|
378
|
+
None
|
|
379
|
+
|
|
380
|
+
RAISES:
|
|
381
|
+
None
|
|
382
|
+
|
|
383
|
+
EXAMPLES:
|
|
384
|
+
>>> self._futile_column_handling_transformation()
|
|
385
|
+
"""
|
|
386
|
+
# Extracting futile column list
|
|
387
|
+
futile_cols = self.data_transformation_params.get("futile_columns", None)
|
|
388
|
+
if futile_cols:
|
|
389
|
+
self.data = self.data.drop(futile_cols, axis=1)
|
|
390
|
+
self._display_msg(msg="Updated dataset after dropping futile columns :",
|
|
391
|
+
data=self.data,
|
|
392
|
+
progress_bar=self.progress_bar)
|
|
393
|
+
|
|
394
|
+
def _date_column_handling_transformation(self):
|
|
395
|
+
"""
|
|
396
|
+
DESCRIPTION:
|
|
397
|
+
Function performs transformation on date columns and generates new columns.
|
|
398
|
+
|
|
399
|
+
PARAMETERS:
|
|
400
|
+
None
|
|
401
|
+
|
|
402
|
+
RETURNS:
|
|
403
|
+
None
|
|
404
|
+
|
|
405
|
+
RAISES:
|
|
406
|
+
None
|
|
407
|
+
|
|
408
|
+
EXAMPLES:
|
|
409
|
+
>>> self._date_column_handling_transformation()
|
|
410
|
+
"""
|
|
411
|
+
# Extracting date columns
|
|
412
|
+
self.date_column_list = self.data_transformation_params.get("date_columns",None)
|
|
413
|
+
if self.date_column_list:
|
|
414
|
+
# Dropping rows with null values in date columns
|
|
415
|
+
self.data = self.data.dropna(subset=self.date_column_list)
|
|
416
|
+
# Extracting unique date columns for dropping
|
|
417
|
+
drop_unique_date_columns = self.data_transformation_params.get("drop_unique_date_columns",None)
|
|
418
|
+
if drop_unique_date_columns:
|
|
419
|
+
self.data = self.data.drop(drop_unique_date_columns, axis=1)
|
|
420
|
+
# Updated date column list after dropping irrelevant date columns
|
|
421
|
+
self.date_column_list = [item for item in self.date_column_list if item not in drop_unique_date_columns]
|
|
422
|
+
|
|
423
|
+
if len(self.date_column_list) != 0:
|
|
424
|
+
# Extracting date components parameters for new columns generation
|
|
425
|
+
new_columns=self._fetch_date_component()
|
|
426
|
+
|
|
427
|
+
# Extracting irrelevant date component columns for dropping
|
|
428
|
+
drop_extract_date_columns = self.data_transformation_params.get("drop_extract_date_columns", None)
|
|
429
|
+
if drop_extract_date_columns:
|
|
430
|
+
self.data = self.data.drop(drop_extract_date_columns, axis=1)
|
|
431
|
+
new_columns = [item for item in new_columns if item not in drop_extract_date_columns]
|
|
432
|
+
|
|
433
|
+
self._display_msg(msg='Updated list of newly generated features from existing date features :',
|
|
434
|
+
col_lst=new_columns)
|
|
435
|
+
self._display_msg(msg="Updated dataset after transforming date columns :",
|
|
436
|
+
data=self.data,
|
|
437
|
+
progress_bar=self.progress_bar)
|
|
438
|
+
|
|
439
|
+
def _missing_value_handling_transformation(self):
|
|
440
|
+
"""
|
|
441
|
+
DESCRIPTION:
|
|
442
|
+
Function performs missing value handling by dropping columns and imputing columns.
|
|
443
|
+
|
|
444
|
+
PARAMETERS:
|
|
445
|
+
None
|
|
446
|
+
|
|
447
|
+
RETURNS:
|
|
448
|
+
None
|
|
449
|
+
|
|
450
|
+
RAISES:
|
|
451
|
+
None
|
|
452
|
+
|
|
453
|
+
EXAMPLES:
|
|
454
|
+
>>> self._missing_value_handling_transformation()
|
|
455
|
+
"""
|
|
456
|
+
# Extracting missing value containing columns to be dropped
|
|
457
|
+
drop_cols = self.data_transformation_params.get("drop_missing_columns", None)
|
|
458
|
+
if drop_cols:
|
|
459
|
+
self.data = self.data.drop(drop_cols, axis=1)
|
|
460
|
+
self._display_msg(msg="Updated dataset after dropping missing value containing columns : ",
|
|
461
|
+
data=self.data,
|
|
462
|
+
progress_bar=self.progress_bar)
|
|
463
|
+
|
|
464
|
+
# Extracting imputation columns and fit object for missing value imputation
|
|
465
|
+
imputation_cols = self.data_transformation_params.get("imputation_columns", None)
|
|
466
|
+
if imputation_cols:
|
|
467
|
+
sm_fit_obj = self.data_transformation_params.get("imputation_fit_object")
|
|
468
|
+
## Workaround done for bug https://teradata-pe.atlassian.net/browse/TDAF-15617.
|
|
469
|
+
#partition_column = self.data_transformation_params.get("imputation_partition_column", None)
|
|
470
|
+
|
|
471
|
+
params = {"data" : self.data,
|
|
472
|
+
"object" : sm_fit_obj
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# if partition_column is not None:
|
|
476
|
+
# params["data_partition_column"] = partition_column
|
|
477
|
+
# params["object_partition_column"] = partition_column
|
|
478
|
+
|
|
479
|
+
# imputing column using fit object
|
|
480
|
+
self.data = SimpleImputeTransform(**params).result
|
|
481
|
+
|
|
482
|
+
self._display_msg(msg="Updated dataset after imputing missing value containing columns :",
|
|
483
|
+
data=self.data,
|
|
484
|
+
progress_bar=self.progress_bar)
|
|
485
|
+
|
|
486
|
+
# Handling rest null, its temporary solution. It subjects to change based on input.
|
|
487
|
+
dropped_data = self.data.dropna()
|
|
488
|
+
dropped_count = self.data.shape[0] - dropped_data.shape[0]
|
|
489
|
+
if dropped_count > 0:
|
|
490
|
+
self._display_msg(msg="Found additional {} rows that contain missing values :".format(dropped_count),
|
|
491
|
+
data=self.data,
|
|
492
|
+
progress_bar=self.progress_bar)
|
|
493
|
+
self.data = dropped_data
|
|
494
|
+
self._display_msg(msg="Updated dataset after dropping additional missing value containing rows :",
|
|
495
|
+
data=self.data,
|
|
496
|
+
progress_bar=self.progress_bar)
|
|
497
|
+
|
|
498
|
+
def _custom_missing_value_handling_transformation(self):
|
|
499
|
+
"""
|
|
500
|
+
DESCRIPTION:
|
|
501
|
+
Function performs missing value handling by dropping columns and imputing
|
|
502
|
+
columns based on user input.
|
|
503
|
+
|
|
504
|
+
PARAMETERS:
|
|
505
|
+
None
|
|
506
|
+
|
|
507
|
+
RETURNS:
|
|
508
|
+
None
|
|
509
|
+
|
|
510
|
+
RAISES:
|
|
511
|
+
None
|
|
512
|
+
|
|
513
|
+
EXAMPLES:
|
|
514
|
+
>>> self._custom_missing_value_handling_transformation()
|
|
515
|
+
"""
|
|
516
|
+
# Extracting custom missing value containing columns to be dropped
|
|
517
|
+
drop_col_list = self.data_transformation_params.get("custom_drop_missing_columns", None)
|
|
518
|
+
if drop_col_list:
|
|
519
|
+
self.data = self.data.drop(drop_col_list, axis=1)
|
|
520
|
+
self._display_msg(msg="Updated dataset after dropping customized missing value containing columns :",
|
|
521
|
+
data=self.data,
|
|
522
|
+
progress_bar=self.progress_bar)
|
|
523
|
+
|
|
524
|
+
# Extracting custom imputation columns and fit object for missing value imputation
|
|
525
|
+
custom_imp_ind = self.data_transformation_params.get("custom_imputation_ind", False)
|
|
526
|
+
if custom_imp_ind:
|
|
527
|
+
sm_fit_obj = self.data_transformation_params.get("custom_imputation_fit_object")
|
|
528
|
+
# imputing column using fit object
|
|
529
|
+
self.data = SimpleImputeTransform(data=self.data,
|
|
530
|
+
object=sm_fit_obj).result
|
|
531
|
+
self._display_msg(msg="Updated dataset after imputing customized missing value containing columns :",
|
|
532
|
+
data=self.data,
|
|
533
|
+
progress_bar=self.progress_bar)
|
|
534
|
+
# Handling rest with default missing value handling
|
|
535
|
+
self._missing_value_handling_transformation()
|
|
536
|
+
|
|
537
|
+
def _custom_bincode_column_transformation(self):
|
|
538
|
+
"""
|
|
539
|
+
DESCRIPTION:
|
|
540
|
+
Function performs bincode transformation on columns based on user input.
|
|
541
|
+
|
|
542
|
+
PARAMETERS:
|
|
543
|
+
None
|
|
544
|
+
|
|
545
|
+
RETURNS:
|
|
546
|
+
None
|
|
547
|
+
|
|
548
|
+
RAISES:
|
|
549
|
+
None
|
|
550
|
+
|
|
551
|
+
EXAMPLES:
|
|
552
|
+
>>> self._custom_bincode_column_transformation()
|
|
553
|
+
"""
|
|
554
|
+
# Extracting custom bincode columns and fit object for bincode transformation
|
|
555
|
+
custom_bincode_ind = self.data_transformation_params.get("custom_bincode_ind", False)
|
|
556
|
+
if custom_bincode_ind:
|
|
557
|
+
# Handling bincode transformation for Equal-Width
|
|
558
|
+
custom_eql_bincode_col = self.data_transformation_params.get("custom_eql_bincode_col", None)
|
|
559
|
+
custom_eql_bincode_fit_object = self.data_transformation_params.get("custom_eql_bincode_fit_object", None)
|
|
560
|
+
if custom_eql_bincode_col:
|
|
561
|
+
# Extracting accumulate columns
|
|
562
|
+
accumulate_columns = self._extract_list(self.data.columns, custom_eql_bincode_col)
|
|
563
|
+
# Adding transform parameters for performing binning with Equal-Width.
|
|
564
|
+
eql_transform_params={
|
|
565
|
+
"data" : self.data,
|
|
566
|
+
"object" : custom_eql_bincode_fit_object,
|
|
567
|
+
"accumulate" : accumulate_columns,
|
|
568
|
+
"persist" : True,
|
|
569
|
+
"display_table_name" : False
|
|
570
|
+
}
|
|
571
|
+
self.data = BincodeTransform(**eql_transform_params).result
|
|
572
|
+
# Adding transformed data containing table to garbage collector
|
|
573
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
574
|
+
self._display_msg(msg="Updated dataset after performing customized equal width bin-code transformation :",
|
|
575
|
+
data=self.data,
|
|
576
|
+
progress_bar=self.progress_bar)
|
|
577
|
+
|
|
578
|
+
# Hnadling bincode transformation for Variable-Width
|
|
579
|
+
custom_var_bincode_col = self.data_transformation_params.get("custom_var_bincode_col", None)
|
|
580
|
+
custom_var_bincode_fit_object = self.data_transformation_params.get("custom_var_bincode_fit_object", None)
|
|
581
|
+
if custom_var_bincode_col:
|
|
582
|
+
# Extracting accumulate columns
|
|
583
|
+
accumulate_columns = self._extract_list(self.data.columns, custom_var_bincode_col)
|
|
584
|
+
# Adding transform parameters for performing binning with Variable-Width.
|
|
585
|
+
var_transform_params = {
|
|
586
|
+
"data" : self.data,
|
|
587
|
+
"object" : custom_var_bincode_fit_object,
|
|
588
|
+
"object_order_column" : "TD_MinValue_BINFIT",
|
|
589
|
+
"accumulate" : accumulate_columns,
|
|
590
|
+
"persist" : True,
|
|
591
|
+
"display_table_name" : False
|
|
592
|
+
}
|
|
593
|
+
self.data = BincodeTransform(**var_transform_params).result
|
|
594
|
+
# Adding transformed data containing table to garbage collector
|
|
595
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
596
|
+
self._display_msg(msg="Updated dataset after performing customized variable width bin-code transformation :",
|
|
597
|
+
data=self.data,
|
|
598
|
+
progress_bar=self.progress_bar)
|
|
599
|
+
|
|
600
|
+
def _custom_string_column_transformation(self):
|
|
601
|
+
"""
|
|
602
|
+
DESCRIPTION:
|
|
603
|
+
Function performs string column transformation on categorical columns based on user input.
|
|
604
|
+
|
|
605
|
+
PARAMETERS:
|
|
606
|
+
None
|
|
607
|
+
|
|
608
|
+
RETURNS:
|
|
609
|
+
None
|
|
610
|
+
|
|
611
|
+
RAISES:
|
|
612
|
+
None
|
|
613
|
+
|
|
614
|
+
EXAMPLES:
|
|
615
|
+
>>> self._custom_string_column_transformation()
|
|
616
|
+
"""
|
|
617
|
+
# Extracting custom string manipulation columns and fit object for performing string manipulation
|
|
618
|
+
custom_string_manipulation_ind = self.data_transformation_params.get("custom_string_manipulation_ind", False)
|
|
619
|
+
if custom_string_manipulation_ind:
|
|
620
|
+
custom_string_manipulation_param = self.data_transformation_params.get('custom_string_manipulation_param', None)
|
|
621
|
+
# Performing string manipulation for each column
|
|
622
|
+
for target_col,transform_val in custom_string_manipulation_param.items():
|
|
623
|
+
self.data = self._str_method_mapping(target_col, transform_val)
|
|
624
|
+
self._display_msg(msg="Updated dataset after performing customized string manipulation :",
|
|
625
|
+
data=self.data,
|
|
626
|
+
progress_bar=self.progress_bar)
|
|
627
|
+
|
|
628
|
+
def _categorical_encoding_transformation(self):
|
|
629
|
+
"""
|
|
630
|
+
DESCRIPTION:
|
|
631
|
+
Function performs default encoding transformation i.e, one-hot on categorical columns.
|
|
632
|
+
|
|
633
|
+
PARAMETERS:
|
|
634
|
+
None
|
|
635
|
+
|
|
636
|
+
RETURNS:
|
|
637
|
+
None
|
|
638
|
+
|
|
639
|
+
RAISES:
|
|
640
|
+
None
|
|
641
|
+
|
|
642
|
+
EXAMPLES:
|
|
643
|
+
>>> self._categorical_encoding_transformation()
|
|
644
|
+
"""
|
|
645
|
+
# Extracting one hot encoding parameters for performing encoding
|
|
646
|
+
one_hot_encoding_ind = self.data_transformation_params.get("one_hot_encoding_ind", False)
|
|
647
|
+
one_hot_encoding_fit_obj = self.data_transformation_params.get("one_hot_encoding_fit_obj", None)
|
|
648
|
+
one_hot_encoding_drop_list = self.data_transformation_params.get("one_hot_encoding_drop_list", None)
|
|
649
|
+
if one_hot_encoding_ind:
|
|
650
|
+
# Adding transform parameters for performing encoding
|
|
651
|
+
for fit_obj in one_hot_encoding_fit_obj.values():
|
|
652
|
+
transform_params = {
|
|
653
|
+
"data" : self.data,
|
|
654
|
+
"object" : fit_obj,
|
|
655
|
+
"is_input_dense" : True,
|
|
656
|
+
"persist" : True,
|
|
657
|
+
"display_table_name" : False
|
|
658
|
+
}
|
|
659
|
+
# Performing one hot encoding transformation
|
|
660
|
+
self.data = OneHotEncodingTransform(**transform_params).result
|
|
661
|
+
# Adding transformed data containing table to garbage collector
|
|
662
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
663
|
+
# Dropping old columns after encoding
|
|
664
|
+
self.data = self.data.drop(one_hot_encoding_drop_list, axis=1)
|
|
665
|
+
self._display_msg(msg="Updated dataset after performing categorical encoding :",
|
|
666
|
+
data=self.data,
|
|
667
|
+
progress_bar=self.progress_bar)
|
|
668
|
+
return
|
|
669
|
+
|
|
670
|
+
# AutoFraud Routine
|
|
671
|
+
auto_target_encoding_ind = self.data_transformation_params.get("auto_target_encoding_ind", False)
|
|
672
|
+
auto_target_encoding_fit_obj = self.data_transformation_params.get("auto_target_encoding_fit_obj", None)
|
|
673
|
+
target_encoding_accumulate_columns = self.data_transformation_params.get("target_encoding_accumulate_columns")
|
|
674
|
+
|
|
675
|
+
if auto_target_encoding_ind:
|
|
676
|
+
# Adding transform parameters for performing encoding
|
|
677
|
+
transform_params = {
|
|
678
|
+
"data" : self.data,
|
|
679
|
+
"object" : auto_target_encoding_fit_obj,
|
|
680
|
+
"accumulate" : target_encoding_accumulate_columns,
|
|
681
|
+
"is_input_dense" : True,
|
|
682
|
+
"persist" : True,
|
|
683
|
+
"display_table_name" : False
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
# Performing one hot encoding transformation
|
|
687
|
+
self.data = TargetEncodingTransform(**transform_params).result
|
|
688
|
+
|
|
689
|
+
# Adding transformed data containing table to garbage collector
|
|
690
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
691
|
+
|
|
692
|
+
self._display_msg(msg="Updated dataset after performing categorical encoding :",
|
|
693
|
+
data=self.data,
|
|
694
|
+
progress_bar=self.progress_bar)
|
|
695
|
+
|
|
696
|
+
def _custom_categorical_encoding_transformation(self):
|
|
697
|
+
"""
|
|
698
|
+
DESCRIPTION:
|
|
699
|
+
Function performs custom encoding transformation on categorical columns based on user input.
|
|
700
|
+
|
|
701
|
+
PARAMETERS:
|
|
702
|
+
None
|
|
703
|
+
|
|
704
|
+
RETURNS:
|
|
705
|
+
None
|
|
706
|
+
|
|
707
|
+
RAISES:
|
|
708
|
+
None
|
|
709
|
+
|
|
710
|
+
EXAMPLES:
|
|
711
|
+
>>> self._custom_categorical_encoding_transformation()
|
|
712
|
+
"""
|
|
713
|
+
# Extracting custom encoding parameters for performing encoding
|
|
714
|
+
custom_categorical_encoding_ind = self.data_transformation_params.get("custom_categorical_encoding_ind", False)
|
|
715
|
+
if custom_categorical_encoding_ind:
|
|
716
|
+
# Extracting parameters for ordinal encoding
|
|
717
|
+
custom_ord_encoding_fit_obj = self.data_transformation_params.get("custom_ord_encoding_fit_obj", None)
|
|
718
|
+
custom_ord_encoding_col = self.data_transformation_params.get("custom_ord_encoding_col", None)
|
|
719
|
+
if custom_ord_encoding_col:
|
|
720
|
+
# Extracting accumulate columns
|
|
721
|
+
accumulate_columns = self._extract_list(self.data.columns, custom_ord_encoding_col)
|
|
722
|
+
# Adding transform parameters for performing encoding
|
|
723
|
+
transform_params = {
|
|
724
|
+
"data" : self.data,
|
|
725
|
+
"object" : custom_ord_encoding_fit_obj,
|
|
726
|
+
"accumulate" : accumulate_columns,
|
|
727
|
+
"persist" : True,
|
|
728
|
+
"display_table_name" : False
|
|
729
|
+
}
|
|
730
|
+
# Performing ordinal encoding transformation
|
|
731
|
+
self.data = OrdinalEncodingTransform(**transform_params).result
|
|
732
|
+
# Adding transformed data containing table to garbage collector
|
|
733
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
734
|
+
# Extracting parameters for target encoding
|
|
735
|
+
custom_target_encoding_ind = self.data_transformation_params.get("custom_target_encoding_ind", False)
|
|
736
|
+
custom_target_encoding_fit_obj = self.data_transformation_params.get("custom_target_encoding_fit_obj", None)
|
|
737
|
+
if custom_target_encoding_ind:
|
|
738
|
+
warn_cols = []
|
|
739
|
+
for col, tar_fit_obj in custom_target_encoding_fit_obj.items():
|
|
740
|
+
# Extracting accumulate columns
|
|
741
|
+
accumulate_columns = self._extract_list(self.data.columns, [col])
|
|
742
|
+
# Adding transform parameters for performing encoding
|
|
743
|
+
transform_params = {
|
|
744
|
+
"data" : self.data,
|
|
745
|
+
"object" : tar_fit_obj,
|
|
746
|
+
"accumulate" : accumulate_columns,
|
|
747
|
+
"persist" : True,
|
|
748
|
+
"display_table_name" : False
|
|
749
|
+
}
|
|
750
|
+
# Performing target encoding transformation
|
|
751
|
+
self.data = TargetEncodingTransform(**transform_params).result
|
|
752
|
+
# Adding transformed data containing table to garbage collector
|
|
753
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
754
|
+
if self.data[self.data[col] == -1].shape[0] > 0:
|
|
755
|
+
warn_cols.append(col)
|
|
756
|
+
|
|
757
|
+
# Checking for unseen values in target encoding columns
|
|
758
|
+
if len(warn_cols) > 0:
|
|
759
|
+
warnings.warn(message=f"Unseen categorical values found in test data column(s): {warn_cols}. \
|
|
760
|
+
This may cause inaccurate predictions. Consider retraining the model with updated data.",
|
|
761
|
+
stacklevel=0)
|
|
762
|
+
|
|
763
|
+
self._display_msg(msg="Updated dataset after performing customized categorical encoding :",
|
|
764
|
+
data=self.data,
|
|
765
|
+
progress_bar=self.progress_bar)
|
|
766
|
+
|
|
767
|
+
# Handling rest with default categorical encoding transformation
|
|
768
|
+
self._categorical_encoding_transformation()
|
|
769
|
+
|
|
770
|
+
def _custom_mathematical_transformation(self):
|
|
771
|
+
"""
|
|
772
|
+
DESCRIPTION:
|
|
773
|
+
Function performs custom mathematical transformation on numerical columns based on user input.
|
|
774
|
+
|
|
775
|
+
PARAMETERS:
|
|
776
|
+
None
|
|
777
|
+
|
|
778
|
+
RETURNS:
|
|
779
|
+
None
|
|
780
|
+
|
|
781
|
+
RAISES:
|
|
782
|
+
None
|
|
783
|
+
|
|
784
|
+
EXAMPLES:
|
|
785
|
+
>>> self._custom_mathematical_transformation()
|
|
786
|
+
"""
|
|
787
|
+
# Extracting custom mathematical transformation parameters for performing transformation
|
|
788
|
+
custom_mathematical_transformation_ind = self.data_transformation_params.get("custom_mathematical_transformation_ind", False)
|
|
789
|
+
if custom_mathematical_transformation_ind:
|
|
790
|
+
# Extracting parameters for performing numapply transformation
|
|
791
|
+
custom_numapply_transformation_param = self.data_transformation_params.get("custom_numapply_transformation_param", None)
|
|
792
|
+
# Checking if numapply transformation param is present
|
|
793
|
+
if custom_numapply_transformation_param:
|
|
794
|
+
# Performing transformation for each column
|
|
795
|
+
for col, transform_val in custom_numapply_transformation_param.items():
|
|
796
|
+
self.data = self._numapply_transformation(col,transform_val)
|
|
797
|
+
|
|
798
|
+
# Extracting parameters for performing numerical transformation
|
|
799
|
+
custom_numerical_transformation_fit_object = self.data_transformation_params.get("custom_numerical_transformation_fit_object", None)
|
|
800
|
+
# Checking if numerical transformation fit object is present
|
|
801
|
+
if custom_numerical_transformation_fit_object:
|
|
802
|
+
# Extracting id columns for performing transformation
|
|
803
|
+
custom_numerical_transformation_id_columns = self.data_transformation_params.get("custom_numerical_transformation_id_columns", None)
|
|
804
|
+
# Checking for target column presence and handling id columns accordingly
|
|
805
|
+
if not self.target_column_ind and \
|
|
806
|
+
self.data_target_column in custom_numerical_transformation_id_columns:
|
|
807
|
+
custom_numerical_transformation_id_columns = self._extract_list(
|
|
808
|
+
custom_numerical_transformation_id_columns,
|
|
809
|
+
[self.data_target_column])
|
|
810
|
+
|
|
811
|
+
# Adding transform parameters for transformation
|
|
812
|
+
transform_params={
|
|
813
|
+
"data" : self.data,
|
|
814
|
+
"object" : custom_numerical_transformation_fit_object,
|
|
815
|
+
"id_columns" : custom_numerical_transformation_id_columns,
|
|
816
|
+
"persist" :True,
|
|
817
|
+
"display_table_name" : False
|
|
818
|
+
}
|
|
819
|
+
# Peforming transformation on target columns
|
|
820
|
+
self.data = Transform(**transform_params).result
|
|
821
|
+
# Adding transformed data containing table to garbage collector
|
|
822
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
823
|
+
self._display_msg(msg="Updated dataset after performing customized mathematical transformation :",
|
|
824
|
+
data=self.data,
|
|
825
|
+
progress_bar=self.progress_bar)
|
|
826
|
+
|
|
827
|
+
def _custom_non_linear_transformation(self):
|
|
828
|
+
"""
|
|
829
|
+
DESCRIPTION:
|
|
830
|
+
Function performs custom non-linear transformation on numerical columns based on user input.
|
|
831
|
+
|
|
832
|
+
PARAMETERS:
|
|
833
|
+
None
|
|
834
|
+
|
|
835
|
+
RETURNS:
|
|
836
|
+
None
|
|
837
|
+
|
|
838
|
+
RAISES:
|
|
839
|
+
None
|
|
840
|
+
|
|
841
|
+
EXAMPLES:
|
|
842
|
+
>>> self._custom_non_linear_transformation()
|
|
843
|
+
"""
|
|
844
|
+
# Extracting custom non-linear transformation parameters for performing transformation
|
|
845
|
+
custom_non_linear_transformation_ind = self.data_transformation_params.get("custom_non_linear_transformation_ind", False)
|
|
846
|
+
if custom_non_linear_transformation_ind:
|
|
847
|
+
# Extracting fit object for non-linear transformation
|
|
848
|
+
fit_obj_list = self.data_transformation_params['custom_non_linear_transformation_fit_object']
|
|
849
|
+
for comb, fit_obj in fit_obj_list.items():
|
|
850
|
+
# Adding transform params for transformation
|
|
851
|
+
transform_params = {
|
|
852
|
+
"data" : self.data,
|
|
853
|
+
"object" : fit_obj,
|
|
854
|
+
"accumulate" : self.data.columns,
|
|
855
|
+
"persist" : True,
|
|
856
|
+
"display_table_name" : False
|
|
857
|
+
}
|
|
858
|
+
# Performing transformation
|
|
859
|
+
self.data = NonLinearCombineTransform(**transform_params).result
|
|
860
|
+
# Adding transformed data containing table to garbage collector
|
|
861
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
862
|
+
self._display_msg(msg="Updated dataset after performing customized non-linear transformation :",
|
|
863
|
+
data=self.data,
|
|
864
|
+
progress_bar=self.progress_bar)
|
|
865
|
+
|
|
866
|
+
def _custom_anti_select_column_transformation(self):
|
|
867
|
+
"""
|
|
868
|
+
DESCRIPTION:
|
|
869
|
+
Function performs custom anti-select transformation on columns based on user input.
|
|
870
|
+
|
|
871
|
+
PARAMETERS:
|
|
872
|
+
None
|
|
873
|
+
|
|
874
|
+
RETURNS:
|
|
875
|
+
None
|
|
876
|
+
|
|
877
|
+
RAISES:
|
|
878
|
+
None
|
|
879
|
+
|
|
880
|
+
EXAMPLES:
|
|
881
|
+
>>> self._custom_anti_select_column_transformation()
|
|
882
|
+
"""
|
|
883
|
+
# Extracting custom anti-select transformation parameters for performing transformation
|
|
884
|
+
custom_anti_select_columns_ind = self.data_transformation_params.get("custom_anti_select_columns_ind", False)
|
|
885
|
+
if custom_anti_select_columns_ind:
|
|
886
|
+
# Extracting anti-select column list
|
|
887
|
+
anti_select_list = self.data_transformation_params.get("custom_anti_select_columns",None)
|
|
888
|
+
if anti_select_list:
|
|
889
|
+
fit_params = {
|
|
890
|
+
"data" : self.data,
|
|
891
|
+
"exclude" : anti_select_list
|
|
892
|
+
}
|
|
893
|
+
# Performing transformation for given user input
|
|
894
|
+
self.data = Antiselect(**fit_params).result
|
|
895
|
+
self._display_msg(msg="Updated dataset after performing customized anti-selection :",
|
|
896
|
+
data=self.data,
|
|
897
|
+
progress_bar=self.progress_bar)
|
|
898
|
+
|
|
899
|
+
def _handle_generated_features_transformation(self):
|
|
900
|
+
"""
|
|
901
|
+
DESCRIPTION:
|
|
902
|
+
Function performs rounding up transformation on generated features
|
|
903
|
+
from feature engineering phase.
|
|
904
|
+
|
|
905
|
+
PARAMETERS:
|
|
906
|
+
None
|
|
907
|
+
|
|
908
|
+
RETURNS:
|
|
909
|
+
None
|
|
910
|
+
|
|
911
|
+
RAISES:
|
|
912
|
+
None
|
|
913
|
+
|
|
914
|
+
EXAMPLES:
|
|
915
|
+
>>> self._handle_generated_features_transformation()
|
|
916
|
+
"""
|
|
917
|
+
# Extracting list of columns to be rounded
|
|
918
|
+
round_columns = self.data_transformation_params.get("round_columns", None)
|
|
919
|
+
if round_columns:
|
|
920
|
+
# Checking for target column presence and handling list accordingly
|
|
921
|
+
if not self.target_column_ind and self.data_target_column in round_columns:
|
|
922
|
+
round_columns = self._extract_list(round_columns, [self.data_target_column])
|
|
923
|
+
|
|
924
|
+
# Extracting accumulate columns
|
|
925
|
+
accumulate_columns = self._extract_list(self.data.columns,round_columns)
|
|
926
|
+
# Performing rounding up on target column upto 4 precision digit
|
|
927
|
+
fit_params = {
|
|
928
|
+
"data" : self.data,
|
|
929
|
+
"target_columns" : round_columns,
|
|
930
|
+
"precision_digit" : 4,
|
|
931
|
+
"accumulate" : accumulate_columns,
|
|
932
|
+
"persist" : True,
|
|
933
|
+
"display_table_name" : False}
|
|
934
|
+
self.data = RoundColumns(**fit_params).result
|
|
935
|
+
# Adding transformed data containing table to garbage collector
|
|
936
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
937
|
+
|
|
938
|
+
def _handle_target_column_transformation(self):
|
|
939
|
+
"""
|
|
940
|
+
DESCRIPTION:
|
|
941
|
+
Function performs encoding and datatype transformation on target column
|
|
942
|
+
for classification problem.
|
|
943
|
+
|
|
944
|
+
PARAMETERS:
|
|
945
|
+
None
|
|
946
|
+
|
|
947
|
+
RETURNS:
|
|
948
|
+
None
|
|
949
|
+
|
|
950
|
+
RAISES:
|
|
951
|
+
None
|
|
952
|
+
|
|
953
|
+
EXAMPLES:
|
|
954
|
+
>>> self._handle_target_column_transformation()
|
|
955
|
+
"""
|
|
956
|
+
# Fetching target column encoding indicator and fit object
|
|
957
|
+
|
|
958
|
+
target_col_encode_ind = self.data_transformation_params.get("target_col_encode_ind", False)
|
|
959
|
+
|
|
960
|
+
if target_col_encode_ind:
|
|
961
|
+
# Extracting ordinal encoding fit object for target column
|
|
962
|
+
target_col_ord_encoding_fit_obj = self.data_transformation_params.get("target_col_ord_encoding_fit_obj", None)
|
|
963
|
+
if target_col_ord_encoding_fit_obj:
|
|
964
|
+
# Extracting accumulate columns
|
|
965
|
+
accumulate_columns = self._extract_list(self.data.columns, [self.data_target_column])
|
|
966
|
+
# Adding transform parameters for performing encoding
|
|
967
|
+
transform_params = {
|
|
968
|
+
"data" : self.data,
|
|
969
|
+
"object" : target_col_ord_encoding_fit_obj,
|
|
970
|
+
"accumulate" : accumulate_columns,
|
|
971
|
+
"persist" : True,
|
|
972
|
+
"display_table_name" : False
|
|
973
|
+
}
|
|
974
|
+
# Performing ordinal encoding transformation
|
|
975
|
+
self.data = OrdinalEncodingTransform(**transform_params).result
|
|
976
|
+
# Adding transformed data containing table to garbage collector
|
|
977
|
+
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
978
|
+
|
|
979
|
+
self._display_msg(msg="Updated dataset after performing target column transformation :",
|
|
980
|
+
data=self.data,
|
|
981
|
+
progress_bar=self.progress_bar)
|
|
982
|
+
|
|
983
|
+
def _extract_and_display_features(self, feature_type, feature_list):
|
|
984
|
+
"""
|
|
985
|
+
DESCRIPTION:
|
|
986
|
+
Function performs extraction of features using feature_list and target column indicator.
|
|
987
|
+
|
|
988
|
+
PARAMETERS:
|
|
989
|
+
feature_type:
|
|
990
|
+
Required Argument.
|
|
991
|
+
Specifies the type of feature selection.
|
|
992
|
+
Types: str
|
|
993
|
+
|
|
994
|
+
feature_list:
|
|
995
|
+
Required Argument.
|
|
996
|
+
Specifies the list of features to be selected.
|
|
997
|
+
Types: list
|
|
998
|
+
|
|
999
|
+
RETURNS:
|
|
1000
|
+
Teradataml dataframe with selected features.
|
|
1001
|
+
|
|
1002
|
+
RAISES:
|
|
1003
|
+
None
|
|
1004
|
+
|
|
1005
|
+
EXAMPLES:
|
|
1006
|
+
>>> feature_df = self._extract_and_display_features(feature_type="lasso", feature_list=["feature1", "feature2", "feature3"])
|
|
1007
|
+
"""
|
|
1008
|
+
# Checking for target column presence and handling list accordingly
|
|
1009
|
+
if not self.target_column_ind and self.data_target_column in feature_list:
|
|
1010
|
+
feature_list = self._extract_list(feature_list, [self.data_target_column])
|
|
1011
|
+
|
|
1012
|
+
# Creating dataframe with selected features
|
|
1013
|
+
feature_df = self.data[feature_list]
|
|
1014
|
+
|
|
1015
|
+
# Displaying feature dataframe
|
|
1016
|
+
self._display_msg(msg=f"Updated dataset after performing {feature_type} feature selection:",
|
|
1017
|
+
data=feature_df,
|
|
1018
|
+
progress_bar=self.progress_bar)
|
|
1019
|
+
|
|
1020
|
+
# Returning feature dataframe
|
|
1021
|
+
return feature_df
|
|
1022
|
+
|
|
1023
|
+
def _feature_selection_lasso_transformation(self):
|
|
1024
|
+
"""
|
|
1025
|
+
DESCRIPTION:
|
|
1026
|
+
Function performs feature selection using lasso followed by scaling.
|
|
1027
|
+
|
|
1028
|
+
PARAMETERS:
|
|
1029
|
+
None
|
|
1030
|
+
|
|
1031
|
+
RETURNS:
|
|
1032
|
+
None
|
|
1033
|
+
|
|
1034
|
+
RAISES:
|
|
1035
|
+
None
|
|
1036
|
+
|
|
1037
|
+
EXAMPLES:
|
|
1038
|
+
>>> self._feature_selection_lasso_transformation()
|
|
1039
|
+
"""
|
|
1040
|
+
# Extracting features selected by lasso in data preparation phase
|
|
1041
|
+
lasso_features = self.data_transformation_params.get("lasso_features", None)
|
|
1042
|
+
lasso_df = self._extract_and_display_features("Lasso", lasso_features)
|
|
1043
|
+
|
|
1044
|
+
# Performing feature scaling
|
|
1045
|
+
# Extracting fit object and columns for scaling
|
|
1046
|
+
lasso_scale_fit_obj = self.data_transformation_params.get("lasso_scale_fit_obj", None)
|
|
1047
|
+
lasso_scale_col = self.data_transformation_params.get("lasso_scale_col", None)
|
|
1048
|
+
# Extracting accumulate columns
|
|
1049
|
+
if lasso_scale_fit_obj is not None:
|
|
1050
|
+
accumulate_cols = self._extract_list(lasso_df.columns, lasso_scale_col)
|
|
1051
|
+
# Scaling dataset
|
|
1052
|
+
lasso_df = ScaleTransform(data=lasso_df,
|
|
1053
|
+
object=lasso_scale_fit_obj,
|
|
1054
|
+
accumulate=accumulate_cols).result
|
|
1055
|
+
# Displaying scaled dataset
|
|
1056
|
+
self._display_msg(msg="Updated dataset after performing scaling on Lasso selected features :",
|
|
1057
|
+
data=lasso_df,
|
|
1058
|
+
progress_bar=self.progress_bar)
|
|
1059
|
+
|
|
1060
|
+
# Uploading lasso dataset to table for further use
|
|
1061
|
+
table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_test",
|
|
1062
|
+
table_type = TeradataConstants.TERADATA_TABLE)
|
|
1063
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
1064
|
+
# table name in fully qualified format.
|
|
1065
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
1066
|
+
# Storing table name mapping for lasso dataset
|
|
1067
|
+
self.table_name_mapping[self.data_node_id]["lasso_test"] = table_name
|
|
1068
|
+
# In the case of the VT option, the table was being persisted, so the VT condition is being checked.
|
|
1069
|
+
is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
1070
|
+
copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
|
|
1071
|
+
|
|
1072
|
+
def _feature_selection_rfe_transformation(self):
|
|
1073
|
+
"""
|
|
1074
|
+
DESCRIPTION:
|
|
1075
|
+
Function performs feature selection using rfe followed by scaling.
|
|
1076
|
+
|
|
1077
|
+
PARAMETERS:
|
|
1078
|
+
None
|
|
1079
|
+
|
|
1080
|
+
RETURNS:
|
|
1081
|
+
None
|
|
1082
|
+
|
|
1083
|
+
RAISES:
|
|
1084
|
+
None
|
|
1085
|
+
|
|
1086
|
+
EXAMPLES:
|
|
1087
|
+
>>> self._feature_selection_rfe_transformation()
|
|
1088
|
+
"""
|
|
1089
|
+
# Extracting features selected by rfe in data preparation phase
|
|
1090
|
+
rfe_features = self.data_transformation_params.get("rfe_features", None)
|
|
1091
|
+
rfe_df = self._extract_and_display_features("RFE", rfe_features)
|
|
1092
|
+
|
|
1093
|
+
# Renaming rfe columns
|
|
1094
|
+
rfe_rename_column = self.data_transformation_params.get("rfe_rename_column", None)
|
|
1095
|
+
if rfe_rename_column:
|
|
1096
|
+
new_col_name = {f'r_{col}': rfe_df[col] for col in rfe_rename_column}
|
|
1097
|
+
rfe_df = rfe_df.assign(drop_columns=False, **new_col_name)
|
|
1098
|
+
rfe_df = rfe_df.drop(rfe_rename_column, axis=1)
|
|
1099
|
+
|
|
1100
|
+
# Performing feature scaling
|
|
1101
|
+
# Extracting fit object and columns for scaling
|
|
1102
|
+
rfe_scale_fit_obj = self.data_transformation_params.get("rfe_scale_fit_obj", None)
|
|
1103
|
+
rfe_scale_col = self.data_transformation_params.get("rfe_scale_col", None)
|
|
1104
|
+
|
|
1105
|
+
if rfe_scale_fit_obj is not None:
|
|
1106
|
+
# Extracting accumulate columns
|
|
1107
|
+
accumulate_cols = self._extract_list(rfe_df.columns, rfe_scale_col)
|
|
1108
|
+
# Scaling on rfe dataset
|
|
1109
|
+
rfe_df = ScaleTransform(data=rfe_df,
|
|
1110
|
+
object=rfe_scale_fit_obj,
|
|
1111
|
+
accumulate=accumulate_cols).result
|
|
1112
|
+
# Displaying scaled dataset
|
|
1113
|
+
self._display_msg(msg="Updated dataset after performing scaling on RFE selected features :",
|
|
1114
|
+
data=rfe_df,
|
|
1115
|
+
progress_bar=self.progress_bar)
|
|
1116
|
+
|
|
1117
|
+
# Uploading rfe dataset to table for further use
|
|
1118
|
+
table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_test",
|
|
1119
|
+
table_type = TeradataConstants.TERADATA_TABLE)
|
|
1120
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
1121
|
+
# table name in fully qualified format.
|
|
1122
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
1123
|
+
# Storing table name mapping for rfe dataset
|
|
1124
|
+
self.table_name_mapping[self.data_node_id]["rfe_test"] = table_name
|
|
1125
|
+
# In the case of the VT option, the table was being persisted, so the VT condition is being checked.
|
|
1126
|
+
is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
1127
|
+
copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
|
|
1128
|
+
|
|
1129
|
+
def _feature_selection_pca_transformation(self):
|
|
1130
|
+
"""
|
|
1131
|
+
DESCRIPTION:
|
|
1132
|
+
Function performs feature scaling followed by feature selection using pca.
|
|
1133
|
+
|
|
1134
|
+
PARAMETERS:
|
|
1135
|
+
None
|
|
1136
|
+
|
|
1137
|
+
RETURNS:
|
|
1138
|
+
None
|
|
1139
|
+
|
|
1140
|
+
RAISES:
|
|
1141
|
+
None
|
|
1142
|
+
|
|
1143
|
+
EXAMPLES:
|
|
1144
|
+
>>> self._feature_selection_pca_transformation()
|
|
1145
|
+
"""
|
|
1146
|
+
# Extracting fit object and column details for perfroming feature scaling
|
|
1147
|
+
pca_scale_fit_obj = self.data_transformation_params.get("pca_scale_fit_obj", None)
|
|
1148
|
+
pca_scale_col = self.data_transformation_params.get("pca_scale_col", None)
|
|
1149
|
+
|
|
1150
|
+
pca_scaled_df = self.data
|
|
1151
|
+
if pca_scale_fit_obj is not None:
|
|
1152
|
+
# Extracting accumulate columns
|
|
1153
|
+
accumulate_cols = self._extract_list(self.data.columns, pca_scale_col)
|
|
1154
|
+
# Scaling on pca dataset
|
|
1155
|
+
pca_scaled_df = ScaleTransform(data=self.data,
|
|
1156
|
+
object=pca_scale_fit_obj,
|
|
1157
|
+
accumulate=accumulate_cols).result
|
|
1158
|
+
# Displaying scaled dataset
|
|
1159
|
+
self._display_msg(msg="Updated dataset after performing scaling for PCA feature selection :",
|
|
1160
|
+
data=pca_scaled_df,
|
|
1161
|
+
progress_bar=self.progress_bar)
|
|
1162
|
+
|
|
1163
|
+
# Convert to pandas dataframe for applying pca
|
|
1164
|
+
pca_scaled_pd = pca_scaled_df.to_pandas().reset_index()
|
|
1165
|
+
# Extracting pca fit instance for applying pca
|
|
1166
|
+
pca_fit_instance = self.data_transformation_params.get("pca_fit_instance", None)
|
|
1167
|
+
# Extracting columns for applying pca
|
|
1168
|
+
pca_fit_columns = self.data_transformation_params.get("pca_fit_columns", None)
|
|
1169
|
+
|
|
1170
|
+
# drop id column and target column if present
|
|
1171
|
+
drop_col = [self.id_column]
|
|
1172
|
+
if self.target_column_ind:
|
|
1173
|
+
drop_col.append(self.data_target_column)
|
|
1174
|
+
pca_df = pca_scaled_pd.drop(columns=drop_col, axis=1)
|
|
1175
|
+
|
|
1176
|
+
# Rearranging columns to match the order used during PCA fitting to
|
|
1177
|
+
# avoid issues during PCA transformation.
|
|
1178
|
+
pca_df = pca_df[pca_fit_columns]
|
|
1179
|
+
|
|
1180
|
+
# Applying pca on scaled dataset
|
|
1181
|
+
pca_df = pca_fit_instance.transform(pca_df)
|
|
1182
|
+
# Converting to pandas dataframe
|
|
1183
|
+
pca_df = pd.DataFrame(pca_df)
|
|
1184
|
+
# Renaming pca columns
|
|
1185
|
+
pca_new_column = self.data_transformation_params.get("pca_new_column", None)
|
|
1186
|
+
pca_df.rename(columns=pca_new_column, inplace=True)
|
|
1187
|
+
# Adding id column to pca dataframe
|
|
1188
|
+
pca_df = pd.concat([pca_scaled_pd.reset_index(drop=True)[self.id_column], pca_df.reset_index(drop=True)],
|
|
1189
|
+
axis=1)
|
|
1190
|
+
# Adding target column to pca dataframe if present
|
|
1191
|
+
if self.target_column_ind:
|
|
1192
|
+
pca_df[self.data_target_column] = pca_scaled_pd[self.data_target_column].reset_index(drop=True)
|
|
1193
|
+
# Displaying pca dataframe
|
|
1194
|
+
self._display_msg(msg="Updated dataset after performing PCA feature selection :",
|
|
1195
|
+
data=pca_df.head(10),
|
|
1196
|
+
progress_bar=self.progress_bar)
|
|
1197
|
+
|
|
1198
|
+
# Uploading pca dataset to table for further use
|
|
1199
|
+
table_name = UtilFuncs._generate_temp_table_name(prefix="pca_test",
|
|
1200
|
+
table_type = TeradataConstants.TERADATA_TABLE)
|
|
1201
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
1202
|
+
# table name in fully qualified format.
|
|
1203
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
1204
|
+
# Storing table name mapping for pca dataset
|
|
1205
|
+
self.table_name_mapping[self.data_node_id]["pca_test"] = table_name
|
|
1206
|
+
# In the case of the VT option, the table was being persisted, so the VT condition is being checked.
|
|
1207
|
+
is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
1208
|
+
copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace", temporary=is_temporary)
|
|
1209
|
+
|
|
1210
|
+
def _feature_selection_non_pca_transformation(self):
|
|
1211
|
+
"""
|
|
1212
|
+
DESCRIPTION:
|
|
1213
|
+
Function performs feature scaling on raw data for non-PCA clustering models.
|
|
1214
|
+
|
|
1215
|
+
PARAMETERS:
|
|
1216
|
+
None
|
|
1217
|
+
|
|
1218
|
+
RETURNS:
|
|
1219
|
+
None
|
|
1220
|
+
|
|
1221
|
+
RAISES:
|
|
1222
|
+
None
|
|
1223
|
+
|
|
1224
|
+
EXAMPLES:
|
|
1225
|
+
>>> self._feature_selection_non_pca_transformation()
|
|
1226
|
+
"""
|
|
1227
|
+
self._display_msg(msg="Running Non-PCA feature selection transformation for clustering...",
|
|
1228
|
+
show_data=True,
|
|
1229
|
+
progress_bar=self.progress_bar)
|
|
1230
|
+
|
|
1231
|
+
# Extracting fit object and columns for scaling
|
|
1232
|
+
non_pca_scale_fit_obj = self.data_transformation_params.get("non_pca_scale_fit_obj", None)
|
|
1233
|
+
non_pca_scale_col = self.data_transformation_params.get("non_pca_scale_col", None)
|
|
1234
|
+
|
|
1235
|
+
if non_pca_scale_fit_obj is not None and non_pca_scale_col is not None:
|
|
1236
|
+
accumulate_cols = self._extract_list(self.data.columns, non_pca_scale_col)
|
|
1237
|
+
|
|
1238
|
+
# Scaling dataset
|
|
1239
|
+
scaled_df = ScaleTransform(data=self.data,
|
|
1240
|
+
object=non_pca_scale_fit_obj,
|
|
1241
|
+
accumulate=accumulate_cols).result
|
|
1242
|
+
|
|
1243
|
+
# Displaying scaled dataset
|
|
1244
|
+
self._display_msg(msg="Updated dataset after performing Non-PCA scaling for clustering:",
|
|
1245
|
+
data=scaled_df,
|
|
1246
|
+
progress_bar=self.progress_bar)
|
|
1247
|
+
|
|
1248
|
+
# Uploading non_pca dataset to SQL
|
|
1249
|
+
table_name = UtilFuncs._generate_temp_table_name(prefix="non_pca_test",
|
|
1250
|
+
table_type=TeradataConstants.TERADATA_TABLE)
|
|
1251
|
+
self.table_name_mapping[self.data_node_id]["non_pca_test"] = table_name
|
|
1252
|
+
copy_to_sql(df=scaled_df, table_name=table_name, if_exists="replace")
|
|
1253
|
+
else:
|
|
1254
|
+
self._display_msg(msg="Missing non_pca_scale_fit_obj or non_pca_scale_col in data transformation params.")
|