teradataml 20.0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +2762 -0
- teradataml/__init__.py +78 -0
- teradataml/_version.py +11 -0
- teradataml/analytics/Transformations.py +2996 -0
- teradataml/analytics/__init__.py +82 -0
- teradataml/analytics/analytic_function_executor.py +2416 -0
- teradataml/analytics/analytic_query_generator.py +1050 -0
- teradataml/analytics/byom/H2OPredict.py +514 -0
- teradataml/analytics/byom/PMMLPredict.py +437 -0
- teradataml/analytics/byom/__init__.py +16 -0
- teradataml/analytics/json_parser/__init__.py +133 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
- teradataml/analytics/json_parser/json_store.py +191 -0
- teradataml/analytics/json_parser/metadata.py +1666 -0
- teradataml/analytics/json_parser/utils.py +805 -0
- teradataml/analytics/meta_class.py +236 -0
- teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
- teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
- teradataml/analytics/sqle/__init__.py +128 -0
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
- teradataml/analytics/table_operator/__init__.py +11 -0
- teradataml/analytics/uaf/__init__.py +82 -0
- teradataml/analytics/utils.py +828 -0
- teradataml/analytics/valib.py +1617 -0
- teradataml/automl/__init__.py +5835 -0
- teradataml/automl/autodataprep/__init__.py +493 -0
- teradataml/automl/custom_json_utils.py +1625 -0
- teradataml/automl/data_preparation.py +1384 -0
- teradataml/automl/data_transformation.py +1254 -0
- teradataml/automl/feature_engineering.py +2273 -0
- teradataml/automl/feature_exploration.py +1873 -0
- teradataml/automl/model_evaluation.py +488 -0
- teradataml/automl/model_training.py +1407 -0
- teradataml/catalog/__init__.py +2 -0
- teradataml/catalog/byom.py +1759 -0
- teradataml/catalog/function_argument_mapper.py +859 -0
- teradataml/catalog/model_cataloging_utils.py +491 -0
- teradataml/clients/__init__.py +0 -0
- teradataml/clients/auth_client.py +137 -0
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/clients/pkce_client.py +481 -0
- teradataml/common/__init__.py +1 -0
- teradataml/common/aed_utils.py +2078 -0
- teradataml/common/bulk_exposed_utils.py +113 -0
- teradataml/common/constants.py +1669 -0
- teradataml/common/deprecations.py +166 -0
- teradataml/common/exceptions.py +147 -0
- teradataml/common/formula.py +743 -0
- teradataml/common/garbagecollector.py +666 -0
- teradataml/common/logger.py +1261 -0
- teradataml/common/messagecodes.py +518 -0
- teradataml/common/messages.py +262 -0
- teradataml/common/pylogger.py +67 -0
- teradataml/common/sqlbundle.py +764 -0
- teradataml/common/td_coltype_code_to_tdtype.py +48 -0
- teradataml/common/utils.py +3166 -0
- teradataml/common/warnings.py +36 -0
- teradataml/common/wrapper_utils.py +625 -0
- teradataml/config/__init__.py +0 -0
- teradataml/config/dummy_file1.cfg +5 -0
- teradataml/config/dummy_file2.cfg +3 -0
- teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
- teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
- teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
- teradataml/context/__init__.py +0 -0
- teradataml/context/aed_context.py +223 -0
- teradataml/context/context.py +1462 -0
- teradataml/data/A_loan.csv +19 -0
- teradataml/data/BINARY_REALS_LEFT.csv +11 -0
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
- teradataml/data/B_loan.csv +49 -0
- teradataml/data/BuoyData2.csv +17 -0
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
- teradataml/data/Convolve2RealsLeft.csv +5 -0
- teradataml/data/Convolve2RealsRight.csv +5 -0
- teradataml/data/Convolve2ValidLeft.csv +11 -0
- teradataml/data/Convolve2ValidRight.csv +11 -0
- teradataml/data/DFFTConv_Real_8_8.csv +65 -0
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/Mall_customer_data.csv +201 -0
- teradataml/data/Orders1_12mf.csv +25 -0
- teradataml/data/Pi_loan.csv +7 -0
- teradataml/data/SMOOTHED_DATA.csv +7 -0
- teradataml/data/TestDFFT8.csv +9 -0
- teradataml/data/TestRiver.csv +109 -0
- teradataml/data/Traindata.csv +28 -0
- teradataml/data/__init__.py +0 -0
- teradataml/data/acf.csv +17 -0
- teradataml/data/adaboost_example.json +34 -0
- teradataml/data/adaboostpredict_example.json +24 -0
- teradataml/data/additional_table.csv +11 -0
- teradataml/data/admissions_test.csv +21 -0
- teradataml/data/admissions_train.csv +41 -0
- teradataml/data/admissions_train_nulls.csv +41 -0
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -0
- teradataml/data/ageandpressure.csv +31 -0
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/antiselect_example.json +36 -0
- teradataml/data/antiselect_input.csv +8 -0
- teradataml/data/antiselect_input_mixed_case.csv +8 -0
- teradataml/data/applicant_external.csv +7 -0
- teradataml/data/applicant_reference.csv +7 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/arima_example.json +9 -0
- teradataml/data/assortedtext_input.csv +8 -0
- teradataml/data/attribution_example.json +34 -0
- teradataml/data/attribution_sample_table.csv +27 -0
- teradataml/data/attribution_sample_table1.csv +6 -0
- teradataml/data/attribution_sample_table2.csv +11 -0
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +43 -0
- teradataml/data/bank_web_clicks2.csv +91 -0
- teradataml/data/bank_web_url.csv +85 -0
- teradataml/data/barrier.csv +2 -0
- teradataml/data/barrier_new.csv +3 -0
- teradataml/data/betweenness_example.json +14 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -0
- teradataml/data/bin_fit_ip.csv +4 -0
- teradataml/data/binary_complex_left.csv +11 -0
- teradataml/data/binary_complex_right.csv +11 -0
- teradataml/data/binary_matrix_complex_left.csv +21 -0
- teradataml/data/binary_matrix_complex_right.csv +21 -0
- teradataml/data/binary_matrix_real_left.csv +21 -0
- teradataml/data/binary_matrix_real_right.csv +21 -0
- teradataml/data/blood2ageandweight.csv +26 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -0
- teradataml/data/burst_data.csv +5 -0
- teradataml/data/burst_example.json +21 -0
- teradataml/data/byom_example.json +34 -0
- teradataml/data/bytes_table.csv +4 -0
- teradataml/data/cal_housing_ex_raw.csv +70 -0
- teradataml/data/callers.csv +7 -0
- teradataml/data/calls.csv +10 -0
- teradataml/data/cars_hist.csv +33 -0
- teradataml/data/cat_table.csv +25 -0
- teradataml/data/ccm_example.json +32 -0
- teradataml/data/ccm_input.csv +91 -0
- teradataml/data/ccm_input2.csv +13 -0
- teradataml/data/ccmexample.csv +101 -0
- teradataml/data/ccmprepare_example.json +9 -0
- teradataml/data/ccmprepare_input.csv +91 -0
- teradataml/data/cfilter_example.json +12 -0
- teradataml/data/changepointdetection_example.json +18 -0
- teradataml/data/changepointdetectionrt_example.json +8 -0
- teradataml/data/chi_sq.csv +3 -0
- teradataml/data/churn_data.csv +14 -0
- teradataml/data/churn_emission.csv +35 -0
- teradataml/data/churn_initial.csv +3 -0
- teradataml/data/churn_state_transition.csv +5 -0
- teradataml/data/citedges_2.csv +745 -0
- teradataml/data/citvertices_2.csv +1210 -0
- teradataml/data/clicks2.csv +16 -0
- teradataml/data/clickstream.csv +13 -0
- teradataml/data/clickstream1.csv +11 -0
- teradataml/data/closeness_example.json +16 -0
- teradataml/data/complaints.csv +21 -0
- teradataml/data/complaints_mini.csv +3 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_testtoken.csv +224 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/complaints_tokens_test.csv +353 -0
- teradataml/data/complaints_traintoken.csv +472 -0
- teradataml/data/computers_category.csv +1001 -0
- teradataml/data/computers_test1.csv +1252 -0
- teradataml/data/computers_train1.csv +5009 -0
- teradataml/data/computers_train1_clustered.csv +5009 -0
- teradataml/data/confusionmatrix_example.json +9 -0
- teradataml/data/conversion_event_table.csv +3 -0
- teradataml/data/corr_input.csv +17 -0
- teradataml/data/correlation_example.json +11 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/coxhazardratio_example.json +39 -0
- teradataml/data/coxph_example.json +15 -0
- teradataml/data/coxsurvival_example.json +28 -0
- teradataml/data/cpt.csv +41 -0
- teradataml/data/credit_ex_merged.csv +45 -0
- teradataml/data/creditcard_data.csv +1001 -0
- teradataml/data/customer_loyalty.csv +301 -0
- teradataml/data/customer_loyalty_newseq.csv +31 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +173 -0
- teradataml/data/decisionforest_example.json +37 -0
- teradataml/data/decisionforestpredict_example.json +38 -0
- teradataml/data/decisiontree_example.json +21 -0
- teradataml/data/decisiontreepredict_example.json +45 -0
- teradataml/data/dfft2_size4_real.csv +17 -0
- teradataml/data/dfft2_test_matrix16.csv +17 -0
- teradataml/data/dfft2conv_real_4_4.csv +65 -0
- teradataml/data/diabetes.csv +443 -0
- teradataml/data/diabetes_test.csv +89 -0
- teradataml/data/dict_table.csv +5 -0
- teradataml/data/docperterm_table.csv +4 -0
- teradataml/data/docs/__init__.py +1 -0
- teradataml/data/docs/byom/__init__.py +0 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
- teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
- teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
- teradataml/data/docs/byom/docs/__init__.py +0 -0
- teradataml/data/docs/sqle/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
- teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
- teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/tableoperator/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
- teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/uaf/__init__.py +0 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
- teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
- teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
- teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
- teradataml/data/dtw_example.json +18 -0
- teradataml/data/dtw_t1.csv +11 -0
- teradataml/data/dtw_t2.csv +4 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt2d_example.json +16 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_example.json +15 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/dwt_filter_dim.csv +5 -0
- teradataml/data/emission.csv +9 -0
- teradataml/data/emp_table_by_dept.csv +19 -0
- teradataml/data/employee_info.csv +4 -0
- teradataml/data/employee_table.csv +6 -0
- teradataml/data/excluding_event_table.csv +2 -0
- teradataml/data/finance_data.csv +6 -0
- teradataml/data/finance_data2.csv +61 -0
- teradataml/data/finance_data3.csv +93 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -0
- teradataml/data/fmeasure_example.json +12 -0
- teradataml/data/followers_leaders.csv +10 -0
- teradataml/data/fpgrowth_example.json +12 -0
- teradataml/data/frequentpaths_example.json +29 -0
- teradataml/data/friends.csv +9 -0
- teradataml/data/fs_input.csv +33 -0
- teradataml/data/fs_input1.csv +33 -0
- teradataml/data/genData.csv +513 -0
- teradataml/data/geodataframe_example.json +40 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -0
- teradataml/data/glm_example.json +56 -0
- teradataml/data/glml1l2_example.json +28 -0
- teradataml/data/glml1l2predict_example.json +54 -0
- teradataml/data/glmpredict_example.json +54 -0
- teradataml/data/gq_t1.csv +21 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/hconvolve_complex_right.csv +5 -0
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
- teradataml/data/histogram_example.json +12 -0
- teradataml/data/hmmdecoder_example.json +79 -0
- teradataml/data/hmmevaluator_example.json +25 -0
- teradataml/data/hmmsupervised_example.json +10 -0
- teradataml/data/hmmunsupervised_example.json +8 -0
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/house_values.csv +12 -0
- teradataml/data/house_values2.csv +13 -0
- teradataml/data/housing_cat.csv +7 -0
- teradataml/data/housing_data.csv +9 -0
- teradataml/data/housing_test.csv +47 -0
- teradataml/data/housing_test_binary.csv +47 -0
- teradataml/data/housing_train.csv +493 -0
- teradataml/data/housing_train_attribute.csv +5 -0
- teradataml/data/housing_train_binary.csv +437 -0
- teradataml/data/housing_train_parameter.csv +2 -0
- teradataml/data/housing_train_response.csv +493 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -0
- teradataml/data/ibm_stock1.csv +370 -0
- teradataml/data/identitymatch_example.json +22 -0
- teradataml/data/idf_table.csv +4 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/impressions.csv +101 -0
- teradataml/data/inflation.csv +21 -0
- teradataml/data/initial.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +13 -0
- teradataml/data/insurance.csv +1339 -0
- teradataml/data/interpolator_example.json +13 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/iris_altinput.csv +481 -0
- teradataml/data/iris_attribute_output.csv +8 -0
- teradataml/data/iris_attribute_test.csv +121 -0
- teradataml/data/iris_attribute_train.csv +481 -0
- teradataml/data/iris_category_expect_predict.csv +31 -0
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -0
- teradataml/data/iris_response_train.csv +121 -0
- teradataml/data/iris_test.csv +31 -0
- teradataml/data/iris_train.csv +121 -0
- teradataml/data/join_table1.csv +4 -0
- teradataml/data/join_table2.csv +4 -0
- teradataml/data/jsons/anly_function_name.json +7 -0
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/dataikupredict.json +148 -0
- teradataml/data/jsons/byom/datarobotpredict.json +147 -0
- teradataml/data/jsons/byom/h2opredict.json +195 -0
- teradataml/data/jsons/byom/onnxembeddings.json +267 -0
- teradataml/data/jsons/byom/onnxpredict.json +187 -0
- teradataml/data/jsons/byom/pmmlpredict.json +147 -0
- teradataml/data/jsons/paired_functions.json +450 -0
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/kmeans_example.json +23 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +51 -0
- teradataml/data/knn_example.json +19 -0
- teradataml/data/knnrecommender_example.json +7 -0
- teradataml/data/knnrecommenderpredict_example.json +12 -0
- teradataml/data/lar_example.json +17 -0
- teradataml/data/larpredict_example.json +30 -0
- teradataml/data/lc_new_predictors.csv +5 -0
- teradataml/data/lc_new_reference.csv +9 -0
- teradataml/data/lda_example.json +9 -0
- teradataml/data/ldainference_example.json +15 -0
- teradataml/data/ldatopicsummary_example.json +9 -0
- teradataml/data/levendist_input.csv +13 -0
- teradataml/data/levenshteindistance_example.json +10 -0
- teradataml/data/linreg_example.json +10 -0
- teradataml/data/load_example_data.py +350 -0
- teradataml/data/loan_prediction.csv +295 -0
- teradataml/data/lungcancer.csv +138 -0
- teradataml/data/mappingdata.csv +12 -0
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/milk_timeseries.csv +157 -0
- teradataml/data/min_max_titanic.csv +4 -0
- teradataml/data/minhash_example.json +6 -0
- teradataml/data/ml_ratings.csv +7547 -0
- teradataml/data/ml_ratings_10.csv +2445 -0
- teradataml/data/mobile_data.csv +13 -0
- teradataml/data/model1_table.csv +5 -0
- teradataml/data/model2_table.csv +5 -0
- teradataml/data/models/License_file.txt +1 -0
- teradataml/data/models/License_file_empty.txt +0 -0
- teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
- teradataml/data/models/dr_iris_rf +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
- teradataml/data/models/iris_db_glm_model.pmml +57 -0
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
- teradataml/data/models/iris_kmeans_model +0 -0
- teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
- teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
- teradataml/data/modularity_example.json +12 -0
- teradataml/data/movavg_example.json +8 -0
- teradataml/data/mtx1.csv +7 -0
- teradataml/data/mtx2.csv +13 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -0
- teradataml/data/naivebayes_example.json +10 -0
- teradataml/data/naivebayespredict_example.json +19 -0
- teradataml/data/naivebayestextclassifier2_example.json +7 -0
- teradataml/data/naivebayestextclassifier_example.json +8 -0
- teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
- teradataml/data/name_Find_configure.csv +10 -0
- teradataml/data/namedentityfinder_example.json +14 -0
- teradataml/data/namedentityfinderevaluator_example.json +10 -0
- teradataml/data/namedentityfindertrainer_example.json +6 -0
- teradataml/data/nb_iris_input_test.csv +31 -0
- teradataml/data/nb_iris_input_train.csv +121 -0
- teradataml/data/nbp_iris_model.csv +13 -0
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_extractor_text.csv +2 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/ner_sports_test2.csv +29 -0
- teradataml/data/ner_sports_train.csv +501 -0
- teradataml/data/nerevaluator_example.json +6 -0
- teradataml/data/nerextractor_example.json +18 -0
- teradataml/data/nermem_sports_test.csv +18 -0
- teradataml/data/nermem_sports_train.csv +51 -0
- teradataml/data/nertrainer_example.json +7 -0
- teradataml/data/ngrams_example.json +7 -0
- teradataml/data/notebooks/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
- teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
- teradataml/data/npath_example.json +23 -0
- teradataml/data/ntree_example.json +14 -0
- teradataml/data/numeric_strings.csv +5 -0
- teradataml/data/numerics.csv +4 -0
- teradataml/data/ocean_buoy.csv +17 -0
- teradataml/data/ocean_buoy2.csv +17 -0
- teradataml/data/ocean_buoys.csv +28 -0
- teradataml/data/ocean_buoys2.csv +10 -0
- teradataml/data/ocean_buoys_nonpti.csv +28 -0
- teradataml/data/ocean_buoys_seq.csv +29 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -0
- teradataml/data/orders1.csv +11 -0
- teradataml/data/orders1_12.csv +13 -0
- teradataml/data/orders_ex.csv +4 -0
- teradataml/data/pack_example.json +9 -0
- teradataml/data/package_tracking.csv +19 -0
- teradataml/data/package_tracking_pti.csv +19 -0
- teradataml/data/pagerank_example.json +13 -0
- teradataml/data/paragraphs_input.csv +6 -0
- teradataml/data/pathanalyzer_example.json +8 -0
- teradataml/data/pathgenerator_example.json +8 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/phrases.csv +7 -0
- teradataml/data/pivot_example.json +9 -0
- teradataml/data/pivot_input.csv +22 -0
- teradataml/data/playerRating.csv +31 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/postagger_example.json +7 -0
- teradataml/data/posttagger_output.csv +44 -0
- teradataml/data/production_data.csv +17 -0
- teradataml/data/production_data2.csv +7 -0
- teradataml/data/randomsample_example.json +32 -0
- teradataml/data/randomwalksample_example.json +9 -0
- teradataml/data/rank_table.csv +6 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/ref_mobile_data.csv +4 -0
- teradataml/data/ref_mobile_data_dense.csv +2 -0
- teradataml/data/ref_url.csv +17 -0
- teradataml/data/restaurant_reviews.csv +7 -0
- teradataml/data/retail_churn_table.csv +27772 -0
- teradataml/data/river_data.csv +145 -0
- teradataml/data/roc_example.json +8 -0
- teradataml/data/roc_input.csv +101 -0
- teradataml/data/rule_inputs.csv +6 -0
- teradataml/data/rule_table.csv +2 -0
- teradataml/data/sales.csv +7 -0
- teradataml/data/sales_transaction.csv +501 -0
- teradataml/data/salesdata.csv +342 -0
- teradataml/data/sample_cities.csv +3 -0
- teradataml/data/sample_shapes.csv +11 -0
- teradataml/data/sample_streets.csv +3 -0
- teradataml/data/sampling_example.json +16 -0
- teradataml/data/sax_example.json +17 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -0
- teradataml/data/scale_housing.csv +11 -0
- teradataml/data/scale_housing_test.csv +6 -0
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -0
- teradataml/data/scalebypartition_example.json +13 -0
- teradataml/data/scalemap_example.json +13 -0
- teradataml/data/scalesummary_example.json +12 -0
- teradataml/data/score_category.csv +101 -0
- teradataml/data/score_summary.csv +4 -0
- teradataml/data/script_example.json +10 -0
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/lightgbm/dataset.template +175 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +16 -0
- teradataml/data/scripts/mapper_replace.py +16 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/seeds.csv +10 -0
- teradataml/data/sentenceextractor_example.json +7 -0
- teradataml/data/sentiment_extract_input.csv +11 -0
- teradataml/data/sentiment_train.csv +16 -0
- teradataml/data/sentiment_word.csv +20 -0
- teradataml/data/sentiment_word_input.csv +20 -0
- teradataml/data/sentimentextractor_example.json +24 -0
- teradataml/data/sentimenttrainer_example.json +8 -0
- teradataml/data/sequence_table.csv +10 -0
- teradataml/data/seriessplitter_example.json +8 -0
- teradataml/data/sessionize_example.json +17 -0
- teradataml/data/sessionize_table.csv +116 -0
- teradataml/data/setop_test1.csv +24 -0
- teradataml/data/setop_test2.csv +22 -0
- teradataml/data/soc_nw_edges.csv +11 -0
- teradataml/data/soc_nw_vertices.csv +8 -0
- teradataml/data/souvenir_timeseries.csv +168 -0
- teradataml/data/sparse_iris_attribute.csv +5 -0
- teradataml/data/sparse_iris_test.csv +121 -0
- teradataml/data/sparse_iris_train.csv +601 -0
- teradataml/data/star1.csv +6 -0
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/state_transition.csv +5 -0
- teradataml/data/stock_data.csv +53 -0
- teradataml/data/stock_movement.csv +11 -0
- teradataml/data/stock_vol.csv +76 -0
- teradataml/data/stop_words.csv +8 -0
- teradataml/data/store_sales.csv +37 -0
- teradataml/data/stringsimilarity_example.json +8 -0
- teradataml/data/strsimilarity_input.csv +13 -0
- teradataml/data/students.csv +101 -0
- teradataml/data/svm_iris_input_test.csv +121 -0
- teradataml/data/svm_iris_input_train.csv +481 -0
- teradataml/data/svm_iris_model.csv +7 -0
- teradataml/data/svmdense_example.json +10 -0
- teradataml/data/svmdensepredict_example.json +19 -0
- teradataml/data/svmsparse_example.json +8 -0
- teradataml/data/svmsparsepredict_example.json +14 -0
- teradataml/data/svmsparsesummary_example.json +8 -0
- teradataml/data/target_mobile_data.csv +13 -0
- teradataml/data/target_mobile_data_dense.csv +5 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/templatedata.csv +1201 -0
- teradataml/data/templates/open_source_ml.json +11 -0
- teradataml/data/teradata_icon.ico +0 -0
- teradataml/data/teradataml_example.json +1473 -0
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -0
- teradataml/data/test_pacf_12.csv +37 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -0
- teradataml/data/text_inputs.csv +6 -0
- teradataml/data/textchunker_example.json +8 -0
- teradataml/data/textclassifier_example.json +7 -0
- teradataml/data/textclassifier_input.csv +7 -0
- teradataml/data/textclassifiertrainer_example.json +7 -0
- teradataml/data/textmorph_example.json +11 -0
- teradataml/data/textparser_example.json +15 -0
- teradataml/data/texttagger_example.json +12 -0
- teradataml/data/texttokenizer_example.json +7 -0
- teradataml/data/texttrainer_input.csv +11 -0
- teradataml/data/tf_example.json +7 -0
- teradataml/data/tfidf_example.json +14 -0
- teradataml/data/tfidf_input1.csv +201 -0
- teradataml/data/tfidf_train.csv +6 -0
- teradataml/data/time_table1.csv +535 -0
- teradataml/data/time_table2.csv +14 -0
- teradataml/data/timeseriesdata.csv +1601 -0
- teradataml/data/timeseriesdatasetsd4.csv +105 -0
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic.csv +892 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/token_table.csv +696 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +28 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/transformation_table.csv +6 -0
- teradataml/data/transformation_table_new.csv +2 -0
- teradataml/data/tv_spots.csv +16 -0
- teradataml/data/twod_climate_data.csv +117 -0
- teradataml/data/uaf_example.json +529 -0
- teradataml/data/univariatestatistics_example.json +9 -0
- teradataml/data/unpack_example.json +10 -0
- teradataml/data/unpivot_example.json +25 -0
- teradataml/data/unpivot_input.csv +8 -0
- teradataml/data/url_data.csv +10 -0
- teradataml/data/us_air_pass.csv +37 -0
- teradataml/data/us_population.csv +624 -0
- teradataml/data/us_states_shapes.csv +52 -0
- teradataml/data/varmax_example.json +18 -0
- teradataml/data/vectordistance_example.json +30 -0
- teradataml/data/ville_climatedata.csv +121 -0
- teradataml/data/ville_tempdata.csv +12 -0
- teradataml/data/ville_tempdata1.csv +12 -0
- teradataml/data/ville_temperature.csv +11 -0
- teradataml/data/waveletTable.csv +1605 -0
- teradataml/data/waveletTable2.csv +1605 -0
- teradataml/data/weightedmovavg_example.json +9 -0
- teradataml/data/wft_testing.csv +5 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +6 -0
- teradataml/data/word_embed_input_table2.csv +5 -0
- teradataml/data/word_embed_model.csv +23 -0
- teradataml/data/words_input.csv +13 -0
- teradataml/data/xconvolve_complex_left.csv +6 -0
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
- teradataml/data/xgboost_example.json +36 -0
- teradataml/data/xgboostpredict_example.json +32 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/__init__.py +0 -0
- teradataml/dataframe/copy_to.py +2446 -0
- teradataml/dataframe/data_transfer.py +2840 -0
- teradataml/dataframe/dataframe.py +20908 -0
- teradataml/dataframe/dataframe_utils.py +2114 -0
- teradataml/dataframe/fastload.py +794 -0
- teradataml/dataframe/functions.py +2110 -0
- teradataml/dataframe/indexer.py +424 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +1171 -0
- teradataml/dataframe/sql.py +10904 -0
- teradataml/dataframe/sql_function_parameters.py +440 -0
- teradataml/dataframe/sql_functions.py +652 -0
- teradataml/dataframe/sql_interfaces.py +220 -0
- teradataml/dataframe/vantage_function_types.py +675 -0
- teradataml/dataframe/window.py +694 -0
- teradataml/dbutils/__init__.py +3 -0
- teradataml/dbutils/dbutils.py +2871 -0
- teradataml/dbutils/filemgr.py +318 -0
- teradataml/gen_ai/__init__.py +2 -0
- teradataml/gen_ai/convAI.py +473 -0
- teradataml/geospatial/__init__.py +4 -0
- teradataml/geospatial/geodataframe.py +1105 -0
- teradataml/geospatial/geodataframecolumn.py +392 -0
- teradataml/geospatial/geometry_types.py +926 -0
- teradataml/hyperparameter_tuner/__init__.py +1 -0
- teradataml/hyperparameter_tuner/optimizer.py +4115 -0
- teradataml/hyperparameter_tuner/utils.py +303 -0
- teradataml/lib/__init__.py +0 -0
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/lib/libaed_0_1_ppc64le.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/_base.py +1321 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/_constants.py +61 -0
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +267 -0
- teradataml/options/__init__.py +148 -0
- teradataml/options/configure.py +489 -0
- teradataml/options/display.py +187 -0
- teradataml/plot/__init__.py +3 -0
- teradataml/plot/axis.py +1427 -0
- teradataml/plot/constants.py +15 -0
- teradataml/plot/figure.py +431 -0
- teradataml/plot/plot.py +810 -0
- teradataml/plot/query_generator.py +83 -0
- teradataml/plot/subplot.py +216 -0
- teradataml/scriptmgmt/UserEnv.py +4273 -0
- teradataml/scriptmgmt/__init__.py +3 -0
- teradataml/scriptmgmt/lls_utils.py +2157 -0
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/__init__.py +0 -0
- teradataml/series/series.py +537 -0
- teradataml/series/series_utils.py +71 -0
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +658 -0
- teradataml/store/feature_store/feature_store.py +4814 -0
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +7330 -0
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/Apply.py +979 -0
- teradataml/table_operators/Script.py +1739 -0
- teradataml/table_operators/TableOperator.py +1343 -0
- teradataml/table_operators/__init__.py +2 -0
- teradataml/table_operators/apply_query_generator.py +262 -0
- teradataml/table_operators/query_generator.py +493 -0
- teradataml/table_operators/table_operator_query_generator.py +462 -0
- teradataml/table_operators/table_operator_util.py +726 -0
- teradataml/table_operators/templates/dataframe_apply.template +184 -0
- teradataml/table_operators/templates/dataframe_map.template +176 -0
- teradataml/table_operators/templates/dataframe_register.template +73 -0
- teradataml/table_operators/templates/dataframe_udf.template +67 -0
- teradataml/table_operators/templates/script_executor.template +170 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +53 -0
- teradataml/utils/__init__.py +0 -0
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +943 -0
- teradataml/utils/internal_buffer.py +122 -0
- teradataml/utils/print_versions.py +206 -0
- teradataml/utils/utils.py +451 -0
- teradataml/utils/validators.py +3305 -0
- teradataml-20.0.0.8.dist-info/METADATA +2804 -0
- teradataml-20.0.0.8.dist-info/RECORD +1208 -0
- teradataml-20.0.0.8.dist-info/WHEEL +5 -0
- teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
- teradataml-20.0.0.8.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,1171 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# ##################################################################
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2019 Teradata. All rights reserved.
|
|
5
|
+
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
6
|
+
#
|
|
7
|
+
# Primary Owner: Rohit Khurd (rohit.khurd@teradata.com
|
|
8
|
+
# Secondary Owner: Abhinav Sahu (abhinav.sahu@teradata.com)
|
|
9
|
+
#
|
|
10
|
+
# This file implements APIs and utility functions for set operations.
|
|
11
|
+
# ##################################################################
|
|
12
|
+
|
|
13
|
+
import inspect, importlib
|
|
14
|
+
from collections import OrderedDict
|
|
15
|
+
from teradataml.common.exceptions import TeradataMlException
|
|
16
|
+
from teradataml.common.messages import Messages
|
|
17
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
18
|
+
from teradataml.common.utils import UtilFuncs
|
|
19
|
+
from teradataml.dataframe import dataframe
|
|
20
|
+
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
21
|
+
from teradataml.common.aed_utils import AedUtils
|
|
22
|
+
from teradataml.dataframe.sql import _MetaExpression
|
|
23
|
+
from teradataml.utils.validators import _Validators
|
|
24
|
+
from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
|
|
25
|
+
from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
|
|
26
|
+
from teradatasql import OperationalError
|
|
27
|
+
|
|
28
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
29
|
+
|
|
30
|
+
module = importlib.import_module("teradataml")
|
|
31
|
+
|
|
32
|
+
def __validate_setop_args(df_list, awu_matrix, setop_type):
|
|
33
|
+
"""
|
|
34
|
+
DESCRIPTION:
|
|
35
|
+
Internal function to check for the validity of the input arguments.
|
|
36
|
+
|
|
37
|
+
PARAMETERS:
|
|
38
|
+
df_list:
|
|
39
|
+
Required argument.
|
|
40
|
+
Specifies the list of teradataml DataFrames.
|
|
41
|
+
Types: list of teradataml DataFrames
|
|
42
|
+
|
|
43
|
+
awu_matrix:
|
|
44
|
+
Required argument.
|
|
45
|
+
Specifies the argument is expected to be a list of arguments, expected types are
|
|
46
|
+
mentioned as type or tuple.
|
|
47
|
+
|
|
48
|
+
setop_type:
|
|
49
|
+
Required argument.
|
|
50
|
+
Specifies the type of SET Operation to be performed.
|
|
51
|
+
Types: str
|
|
52
|
+
|
|
53
|
+
RAISES:
|
|
54
|
+
TeradataMlException
|
|
55
|
+
|
|
56
|
+
EXAMPLES:
|
|
57
|
+
__validate_setop_args(df_list, awu_matrix, setop_type)
|
|
58
|
+
|
|
59
|
+
"""
|
|
60
|
+
# Validate argument types
|
|
61
|
+
_Validators._validate_function_arguments(awu_matrix)
|
|
62
|
+
|
|
63
|
+
# Validate the number of dfs in df_list
|
|
64
|
+
if len(df_list) < 2:
|
|
65
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_INVALID_DF_COUNT,
|
|
66
|
+
setop_type),
|
|
67
|
+
MessageCodes.SETOP_INVALID_DF_COUNT)
|
|
68
|
+
|
|
69
|
+
# Validate if all items in df_list are DataFrames
|
|
70
|
+
for i in range(len(df_list)):
|
|
71
|
+
_Validators._validate_function_arguments([['df_list[{0}]'.format(i), df_list[i],
|
|
72
|
+
False, (dataframe.DataFrame)]])
|
|
73
|
+
|
|
74
|
+
# Validate number of columns for 'td_intersect' and 'td_minus'
|
|
75
|
+
if setop_type in ['td_intersect', 'td_minus', 'td_except']:
|
|
76
|
+
it = iter(df_list[i].columns for i in range(len(df_list)))
|
|
77
|
+
the_len = len(next(it))
|
|
78
|
+
if not all(len(l) == the_len for l in it):
|
|
79
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_DF_LENGTH),
|
|
80
|
+
MessageCodes.INVALID_DF_LENGTH)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def __check_concat_compatibility(df_list, join, sort, ignore_index):
|
|
84
|
+
"""
|
|
85
|
+
DESCRIPTION:
|
|
86
|
+
Internal function to check if the DataFrames are compatible for concat or not.
|
|
87
|
+
|
|
88
|
+
PARAMETERS:
|
|
89
|
+
df_list:
|
|
90
|
+
Required argument.
|
|
91
|
+
Specifies the list of teradataml DataFrames to be concatenated.
|
|
92
|
+
Type: list of teradataml DataFrames
|
|
93
|
+
|
|
94
|
+
join:
|
|
95
|
+
Required argument.
|
|
96
|
+
Specifies the type of join to use in concat ('inner' or 'outer').
|
|
97
|
+
Type: str
|
|
98
|
+
|
|
99
|
+
sort:
|
|
100
|
+
Required argument.
|
|
101
|
+
Specifies a flag to determine whether the columns should be sorted while being projected.
|
|
102
|
+
Type: bool
|
|
103
|
+
|
|
104
|
+
ignore_index:
|
|
105
|
+
Required argument.
|
|
106
|
+
Specifies whether to ignore the index columns in resulting DataFrame or not.
|
|
107
|
+
Types: bool
|
|
108
|
+
|
|
109
|
+
RETURNS:
|
|
110
|
+
A tuple of the following form:
|
|
111
|
+
(master_column_dict, is_lazy)
|
|
112
|
+
|
|
113
|
+
where master_column_dict is a dictionary with the column names to project as a result as the keys,
|
|
114
|
+
and is of the following form:
|
|
115
|
+
{
|
|
116
|
+
'<col_name_1>' : {
|
|
117
|
+
'col_present' : [True, False],
|
|
118
|
+
'col_type': <type>
|
|
119
|
+
},
|
|
120
|
+
'<col_name_2>' : {
|
|
121
|
+
...
|
|
122
|
+
},
|
|
123
|
+
...
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
The value of the keys in the dictionary is again a dictionary with the following elements:
|
|
127
|
+
1. 'col_present': A list of booleans, the nth value in it indicating the columns presence in the nth DF.
|
|
128
|
+
Presence specified by True, and absence by False,
|
|
129
|
+
2. 'col_type': The teradatasqlalchemy datatype of the column in the first DF that the column is present in,
|
|
130
|
+
|
|
131
|
+
and 'is_lazy' is a boolean which indicates whether the result DataFrame creation should be a lazy operation
|
|
132
|
+
or not, based on the column type compatibility.
|
|
133
|
+
|
|
134
|
+
RAISES:
|
|
135
|
+
None
|
|
136
|
+
|
|
137
|
+
EXAMPLES:
|
|
138
|
+
columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort)
|
|
139
|
+
"""
|
|
140
|
+
dfs_to_operate_on = df_list
|
|
141
|
+
|
|
142
|
+
# Initialize the return objects including a variable deciding whether the execution is lazy or not.
|
|
143
|
+
# The execution will be non-lazy if the types of columns are not an exact match.
|
|
144
|
+
# TODO: Add a set operation type compatibility matrix for use to make this operation completely lazy
|
|
145
|
+
# https://jira.td.teradata.com/jira/browse/ELE-1913
|
|
146
|
+
|
|
147
|
+
col_dict = OrderedDict()
|
|
148
|
+
is_lazy = True
|
|
149
|
+
|
|
150
|
+
# Iterate on all DFs to be applied for set operation.
|
|
151
|
+
for df in dfs_to_operate_on:
|
|
152
|
+
# Process each column in the DF of the iteration.
|
|
153
|
+
for c in df._metaexpr.c:
|
|
154
|
+
col_name = c.name
|
|
155
|
+
# Process the column name if it is not already processed.
|
|
156
|
+
# Processing of set operation is column name based so if the DF in the nth iteration had column 'xyz',
|
|
157
|
+
# then the column with the same name in any DF in later iterations need not be processed.
|
|
158
|
+
if col_name not in col_dict:
|
|
159
|
+
# For every column, it's entry in the dictionary looks like:
|
|
160
|
+
# '<column_name>' : { 'col_present' : [True, False], 'col_type': <type> }
|
|
161
|
+
# where :
|
|
162
|
+
# '<column_name>' : is the name of the column being processed.
|
|
163
|
+
#
|
|
164
|
+
# It's value is yet another dictionary with keys:
|
|
165
|
+
# 'col_present' : Its value is a list of booleans, the nth value in it indicating the
|
|
166
|
+
# columns presence in the nth DF - presence specified by True,
|
|
167
|
+
# and absence by False.
|
|
168
|
+
# 'col_type' : Its value is the teradatasqlalchemy type of the column in the first DF
|
|
169
|
+
# that the column is present in.
|
|
170
|
+
|
|
171
|
+
# Generate a list of booleans, each value of it indicating the columns presence in the DF in the
|
|
172
|
+
# dfs_to_operate_on list. If ignore_index is True then assign False so that we can ignore when
|
|
173
|
+
# forming dict.
|
|
174
|
+
|
|
175
|
+
col_present_in_dfs = []
|
|
176
|
+
for inner_df in dfs_to_operate_on:
|
|
177
|
+
col_present_in_df = None
|
|
178
|
+
if ignore_index and inner_df.index and col_name in inner_df._index_label:
|
|
179
|
+
col_present_in_df = False
|
|
180
|
+
else:
|
|
181
|
+
col_present_in_df = df_utils._check_column_exists(col_name, inner_df.columns)
|
|
182
|
+
col_present_in_dfs.append(col_present_in_df)
|
|
183
|
+
|
|
184
|
+
if join.upper() == 'INNER':
|
|
185
|
+
# For inner join, column has to present in all DFs.
|
|
186
|
+
if all(col_present_in_dfs):
|
|
187
|
+
col_dict[col_name] = {}
|
|
188
|
+
|
|
189
|
+
# Get the type of the column in all the DFs.
|
|
190
|
+
col_types_in_dfs = [inner_df._metaexpr.t.c[col_name].type for inner_df in
|
|
191
|
+
dfs_to_operate_on]
|
|
192
|
+
|
|
193
|
+
# Populate the 'column_present' list using the col_present_in_dfs.
|
|
194
|
+
col_dict[col_name]['col_present'] = col_present_in_dfs
|
|
195
|
+
# The type to be used for the column is the one of the first DF it is present in.
|
|
196
|
+
col_dict[col_name]['col_type'] = col_types_in_dfs[0]
|
|
197
|
+
# Column name stored with quotes if required.
|
|
198
|
+
col_dict[col_name]['name'] = c.compile()
|
|
199
|
+
|
|
200
|
+
# If the type of the column in all DFs is not the same, then the operation is not lazy.
|
|
201
|
+
if not all(ctype == col_dict[col_name]['col_type']
|
|
202
|
+
for ctype in col_types_in_dfs):
|
|
203
|
+
is_lazy = False
|
|
204
|
+
|
|
205
|
+
elif join.upper() == 'OUTER':
|
|
206
|
+
# If the column is marked as False for all DataFrames
|
|
207
|
+
if not any(col_present_in_dfs):
|
|
208
|
+
pass
|
|
209
|
+
else:
|
|
210
|
+
# For outer join, column need not be present in all DFs.
|
|
211
|
+
col_dict[col_name] = {}
|
|
212
|
+
# Get the type of the column in all the DFs. None for the DF it is not present in.
|
|
213
|
+
col_types_in_dfs = [None if not present else inner_df._metaexpr.t.c[col_name].type
|
|
214
|
+
for (inner_df, present) in zip(dfs_to_operate_on, col_present_in_dfs)]
|
|
215
|
+
|
|
216
|
+
# Find the type of the column in the first DF it is present in.
|
|
217
|
+
non_none_type_to_add = next(ctype for ctype in col_types_in_dfs if ctype is not None)
|
|
218
|
+
|
|
219
|
+
# Populate the 'column_present' list using the col_present_in_dfs.
|
|
220
|
+
col_dict[col_name]['col_present'] = col_present_in_dfs
|
|
221
|
+
# The type to be used for the column is the one of the first DF it is present in.
|
|
222
|
+
col_dict[col_name]['col_type'] = non_none_type_to_add
|
|
223
|
+
# Column name stored with quotes if required.
|
|
224
|
+
col_dict[col_name]['name'] = c.compile()
|
|
225
|
+
|
|
226
|
+
# If the type of the column in all DFs is not the same, then the operation is not lazy.
|
|
227
|
+
if not all(True if ctype is None else ctype == non_none_type_to_add
|
|
228
|
+
for ctype in col_types_in_dfs):
|
|
229
|
+
is_lazy = False
|
|
230
|
+
|
|
231
|
+
# Sort if required
|
|
232
|
+
if sort and join.upper() == 'OUTER':
|
|
233
|
+
col_dict = OrderedDict(sorted(col_dict.items()))
|
|
234
|
+
|
|
235
|
+
# If the result has no columns, i.e. no data
|
|
236
|
+
if len(col_dict) < 1:
|
|
237
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.DF_WITH_NO_COLUMNS),
|
|
238
|
+
MessageCodes.DF_WITH_NO_COLUMNS)
|
|
239
|
+
|
|
240
|
+
return col_dict, is_lazy
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def __check_setop_if_lazy(df_list):
|
|
244
|
+
"""
|
|
245
|
+
DESCRIPTION:
|
|
246
|
+
Internal function to check if the teradataml DataFrames column types are compatible for
|
|
247
|
+
any set operation or not.
|
|
248
|
+
|
|
249
|
+
PARAMETERS:
|
|
250
|
+
df_list:
|
|
251
|
+
Required argument.
|
|
252
|
+
Specifies the list of teradataml DataFrames.
|
|
253
|
+
Types: list of teradataml DataFrames
|
|
254
|
+
|
|
255
|
+
RETURNS:
|
|
256
|
+
A boolean 'is_lazy' which indicates whether the result DataFrame creation should be a
|
|
257
|
+
lazy operation or not.
|
|
258
|
+
|
|
259
|
+
RAISES:
|
|
260
|
+
None
|
|
261
|
+
|
|
262
|
+
EXAMPLES:
|
|
263
|
+
is_lazy = __check_setop_if_lazy(df_list)
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
# Initialize the return variable deciding whether the execution is lazy or not.
|
|
267
|
+
# The execution will be non-lazy if the types of columns are not an exact match.
|
|
268
|
+
is_lazy = True
|
|
269
|
+
|
|
270
|
+
# Take first df's metadata for columns and then iterate for column_names on first DF which
|
|
271
|
+
# has to be projected for any set operation.
|
|
272
|
+
for i, col in enumerate(df_list[0]._metaexpr.t.c):
|
|
273
|
+
for k in range(1, len(df_list)) :
|
|
274
|
+
next_df_cols = df_list[k].columns
|
|
275
|
+
next_df_type = df_list[k]._metaexpr.t.c[next_df_cols[i]].type
|
|
276
|
+
if (type(next_df_type) != type(col.type)):
|
|
277
|
+
is_lazy = False
|
|
278
|
+
|
|
279
|
+
return is_lazy
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name = None):
|
|
283
|
+
"""
|
|
284
|
+
DESCRIPTION:
|
|
285
|
+
Internal function to process the columns as per given nodeid and setop_type, and
|
|
286
|
+
return the result DataFrame.
|
|
287
|
+
|
|
288
|
+
PARAMETERS:
|
|
289
|
+
meta_data:
|
|
290
|
+
Required argument.
|
|
291
|
+
Specifies either a metaexpr for the first DataFrame or a dictionary with the
|
|
292
|
+
column names as dictionary keys to be projected as a result. If a dict, the value
|
|
293
|
+
of the keys in the dictionary is again a dictionary with the elements mentioning
|
|
294
|
+
column presence and its type.
|
|
295
|
+
Types: _MetaExpression, OrderedDict
|
|
296
|
+
|
|
297
|
+
is_lazy:
|
|
298
|
+
Required argument.
|
|
299
|
+
Specifies a boolean based on the column type compatibility, indicating
|
|
300
|
+
whether set operation is lazy or not.
|
|
301
|
+
Types: bool
|
|
302
|
+
|
|
303
|
+
setop_type:
|
|
304
|
+
Required argument.
|
|
305
|
+
Specifies the type of SET Operation to be performed.
|
|
306
|
+
Types: str
|
|
307
|
+
|
|
308
|
+
nodeid:
|
|
309
|
+
Required argument.
|
|
310
|
+
node id for the teradataml DataFrame.
|
|
311
|
+
|
|
312
|
+
index_label:
|
|
313
|
+
Required argument.
|
|
314
|
+
Specifies list of index columns for teradataml DataFrame.
|
|
315
|
+
Types: list
|
|
316
|
+
|
|
317
|
+
index_to_use:
|
|
318
|
+
Required argument.
|
|
319
|
+
Specifies column(s) which can also be part of final index_label list.
|
|
320
|
+
Types: list
|
|
321
|
+
|
|
322
|
+
class_name:
|
|
323
|
+
Optional argument.
|
|
324
|
+
Specifies the name of the class for the first dataframe for deciding the
|
|
325
|
+
return type of the output dataframe.
|
|
326
|
+
Default: None
|
|
327
|
+
Types: String
|
|
328
|
+
|
|
329
|
+
RETURNS:
|
|
330
|
+
teradataml DataFrame
|
|
331
|
+
|
|
332
|
+
RAISES:
|
|
333
|
+
TeradataMlException
|
|
334
|
+
|
|
335
|
+
EXAMPLES:
|
|
336
|
+
>>> __process_operation(meta_data, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
|
|
337
|
+
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
# Separate processing for concat and other set operators as concat has OrderedDict as metadata.
|
|
341
|
+
if setop_type == 'concat':
|
|
342
|
+
class_name = "DataFrame"
|
|
343
|
+
column_info = list((col_name, meta_data[col_name]['col_type']) for col_name in meta_data)
|
|
344
|
+
for col in column_info:
|
|
345
|
+
if isinstance(col[1], (GEOMETRY, MBR, MBB)):
|
|
346
|
+
class_name = "GeoDataFrame"
|
|
347
|
+
break
|
|
348
|
+
|
|
349
|
+
# Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
|
|
350
|
+
meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info,
|
|
351
|
+
datalake=meta_data.datalake if isinstance(meta_data, _MetaExpression) else None) if is_lazy \
|
|
352
|
+
else meta_data
|
|
353
|
+
|
|
354
|
+
if is_lazy:
|
|
355
|
+
return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
|
|
356
|
+
else:
|
|
357
|
+
try:
|
|
358
|
+
# Execute node and get table_name to build DataFrame on.
|
|
359
|
+
table_name = df_utils._execute_node_return_db_object_name(nodeid)
|
|
360
|
+
return getattr(module, class_name).from_table(table_name, index_label=index_to_use)
|
|
361
|
+
except TeradataMlException as err:
|
|
362
|
+
# We should be here only because of failure caused in creating DF.
|
|
363
|
+
# due to incompatible types, but a TeradataMLException is raised when DF creation fails.
|
|
364
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_COL_TYPE_MISMATCH, setop_type),
|
|
365
|
+
MessageCodes.SETOP_COL_TYPE_MISMATCH) from err
|
|
366
|
+
except OperationalError:
|
|
367
|
+
raise
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
@collect_queryband(queryband="concat")
|
|
371
|
+
def concat(df_list, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
|
|
372
|
+
"""
|
|
373
|
+
DESCRIPTION:
|
|
374
|
+
Concatenates a list of teradataml DataFrames, GeoDataFrames, or both along the index axis.
|
|
375
|
+
|
|
376
|
+
PARAMETERS:
|
|
377
|
+
df_list:
|
|
378
|
+
Required argument.
|
|
379
|
+
Specifies a list of teradataml DataFrames, GeoDataFrames, or both on which the
|
|
380
|
+
concatenation is to be performed.
|
|
381
|
+
Types: list of teradataml DataFrames and/or GeoDataFrames
|
|
382
|
+
|
|
383
|
+
join:
|
|
384
|
+
Optional argument.
|
|
385
|
+
Specifies how to handle indexes on columns axis.
|
|
386
|
+
Supported values are:
|
|
387
|
+
• 'OUTER': It instructs the function to project all columns from all the DataFrames.
|
|
388
|
+
Columns not present in any DataFrame will have a SQL NULL value.
|
|
389
|
+
• 'INNER': It instructs the function to project only the columns common to all DataFrames.
|
|
390
|
+
Default value: 'OUTER'
|
|
391
|
+
Permitted values: 'INNER', 'OUTER'
|
|
392
|
+
Types: str
|
|
393
|
+
|
|
394
|
+
allow_duplicates:
|
|
395
|
+
Optional argument.
|
|
396
|
+
Specifies if the result of concatenation can have duplicate rows.
|
|
397
|
+
Default value: True
|
|
398
|
+
Types: bool
|
|
399
|
+
|
|
400
|
+
sort:
|
|
401
|
+
Optional argument.
|
|
402
|
+
Specifies a flag to sort the columns axis if it is not already aligned when
|
|
403
|
+
the join argument is set to 'outer'.
|
|
404
|
+
Default value: False
|
|
405
|
+
Types: bool
|
|
406
|
+
|
|
407
|
+
ignore_index:
|
|
408
|
+
Optional argument.
|
|
409
|
+
Specifies whether to ignore the index columns in resulting DataFrame or not.
|
|
410
|
+
If True, then index columns will be ignored in the concat operation.
|
|
411
|
+
Default value: False
|
|
412
|
+
Types: bool
|
|
413
|
+
|
|
414
|
+
RETURNS:
|
|
415
|
+
teradataml DataFrame, if result does not contain any geometry data, otherwise returns teradataml GeoDataFrame.
|
|
416
|
+
|
|
417
|
+
RAISES:
|
|
418
|
+
TeradataMlException
|
|
419
|
+
|
|
420
|
+
EXAMPLES:
|
|
421
|
+
>>> from teradataml import load_example_data
|
|
422
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
423
|
+
>>> load_example_data("geodataframe", ["sample_shapes"])
|
|
424
|
+
>>> from teradataml.dataframe import concat
|
|
425
|
+
>>>
|
|
426
|
+
>>> # Default options
|
|
427
|
+
>>> df = DataFrame('admissions_train')
|
|
428
|
+
>>> df1 = df[df.gpa == 4].select(['id', 'stats', 'masters', 'gpa'])
|
|
429
|
+
>>> df1
|
|
430
|
+
stats masters gpa
|
|
431
|
+
id
|
|
432
|
+
13 Advanced no 4.0
|
|
433
|
+
29 Novice yes 4.0
|
|
434
|
+
15 Advanced yes 4.0
|
|
435
|
+
>>> df2 = df[df.gpa < 2].select(['id', 'stats', 'programming', 'admitted'])
|
|
436
|
+
>>> df2
|
|
437
|
+
stats programming admitted
|
|
438
|
+
id
|
|
439
|
+
24 Advanced Novice 1
|
|
440
|
+
19 Advanced Advanced 0
|
|
441
|
+
>>> cdf = concat([df1, df2])
|
|
442
|
+
>>> cdf
|
|
443
|
+
stats masters gpa programming admitted
|
|
444
|
+
id
|
|
445
|
+
19 Advanced None NaN Advanced 0
|
|
446
|
+
24 Advanced None NaN Novice 1
|
|
447
|
+
13 Advanced no 4.0 None None
|
|
448
|
+
29 Novice yes 4.0 None None
|
|
449
|
+
15 Advanced yes 4.0 None None
|
|
450
|
+
>>>
|
|
451
|
+
>>> # concat more than two DataFrames
|
|
452
|
+
>>> df3 = df[df.gpa == 3].select(['id', 'stats', 'programming', 'gpa'])
|
|
453
|
+
>>> df3
|
|
454
|
+
stats programming gpa
|
|
455
|
+
id
|
|
456
|
+
36 Advanced Novice 3.0
|
|
457
|
+
>>> cdf = concat([df1, df2, df3])
|
|
458
|
+
>>> cdf
|
|
459
|
+
stats masters gpa programming admitted
|
|
460
|
+
id
|
|
461
|
+
15 Advanced yes 4.0 None NaN
|
|
462
|
+
19 Advanced None NaN Advanced 0.0
|
|
463
|
+
36 Advanced None 3.0 Novice NaN
|
|
464
|
+
29 Novice yes 4.0 None NaN
|
|
465
|
+
13 Advanced no 4.0 None NaN
|
|
466
|
+
24 Advanced None NaN Novice 1.0
|
|
467
|
+
|
|
468
|
+
>>> # join = 'inner'
|
|
469
|
+
>>> cdf = concat([df1, df2], join='inner')
|
|
470
|
+
>>> cdf
|
|
471
|
+
stats
|
|
472
|
+
id
|
|
473
|
+
19 Advanced
|
|
474
|
+
24 Advanced
|
|
475
|
+
13 Advanced
|
|
476
|
+
29 Novice
|
|
477
|
+
15 Advanced
|
|
478
|
+
>>>
|
|
479
|
+
>>> # allow_duplicates = True (default)
|
|
480
|
+
>>> cdf = concat([df1, df2])
|
|
481
|
+
>>> cdf
|
|
482
|
+
stats masters gpa programming admitted
|
|
483
|
+
id
|
|
484
|
+
19 Advanced None NaN Advanced 0
|
|
485
|
+
24 Advanced None NaN Novice 1
|
|
486
|
+
13 Advanced no 4.0 None None
|
|
487
|
+
29 Novice yes 4.0 None None
|
|
488
|
+
15 Advanced yes 4.0 None None
|
|
489
|
+
>>> cdf = concat([cdf, df2])
|
|
490
|
+
>>> cdf
|
|
491
|
+
stats masters gpa programming admitted
|
|
492
|
+
id
|
|
493
|
+
19 Advanced None NaN Advanced 0
|
|
494
|
+
13 Advanced no 4.0 None None
|
|
495
|
+
24 Advanced None NaN Novice 1
|
|
496
|
+
24 Advanced None NaN Novice 1
|
|
497
|
+
19 Advanced None NaN Advanced 0
|
|
498
|
+
29 Novice yes 4.0 None None
|
|
499
|
+
15 Advanced yes 4.0 None None
|
|
500
|
+
>>>
|
|
501
|
+
>>> # allow_duplicates = False
|
|
502
|
+
>>> cdf = concat([cdf, df2], allow_duplicates=False)
|
|
503
|
+
>>> cdf
|
|
504
|
+
stats masters gpa programming admitted
|
|
505
|
+
id
|
|
506
|
+
19 Advanced None NaN Advanced 0
|
|
507
|
+
29 Novice yes 4.0 None None
|
|
508
|
+
24 Advanced None NaN Novice 1
|
|
509
|
+
15 Advanced yes 4.0 None None
|
|
510
|
+
13 Advanced no 4.0 None None
|
|
511
|
+
>>>
|
|
512
|
+
>>> # sort = True
|
|
513
|
+
>>> cdf = concat([df1, df2], sort=True)
|
|
514
|
+
>>> cdf
|
|
515
|
+
admitted gpa masters programming stats
|
|
516
|
+
id
|
|
517
|
+
19 0 NaN None Advanced Advanced
|
|
518
|
+
24 1 NaN None Novice Advanced
|
|
519
|
+
13 None 4.0 no None Advanced
|
|
520
|
+
29 None 4.0 yes None Novice
|
|
521
|
+
15 None 4.0 yes None Advanced
|
|
522
|
+
>>>
|
|
523
|
+
>>> # ignore_index = True
|
|
524
|
+
>>> cdf = concat([df1, df2], ignore_index=True)
|
|
525
|
+
>>> cdf
|
|
526
|
+
stats masters gpa programming admitted
|
|
527
|
+
0 Advanced yes 4.0 None NaN
|
|
528
|
+
1 Advanced None NaN Advanced 0.0
|
|
529
|
+
2 Novice yes 4.0 None NaN
|
|
530
|
+
3 Advanced None NaN Novice 1.0
|
|
531
|
+
4 Advanced no 4.0 None NaN
|
|
532
|
+
|
|
533
|
+
# Perform concatenation of two GeoDataFrames
|
|
534
|
+
>>> geo_dataframe = GeoDataFrame('sample_shapes')
|
|
535
|
+
>>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
|
|
536
|
+
>>> geo_dataframe1
|
|
537
|
+
|
|
538
|
+
skey linestrings
|
|
539
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
540
|
+
|
|
541
|
+
>>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','polygons'])
|
|
542
|
+
>>> geo_dataframe2
|
|
543
|
+
|
|
544
|
+
skey polygons
|
|
545
|
+
1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
|
|
546
|
+
1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
|
|
547
|
+
1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
|
|
548
|
+
1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
|
|
549
|
+
1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
|
|
550
|
+
1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
|
|
551
|
+
1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
|
|
552
|
+
1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
|
|
553
|
+
1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
>>> concat([geo_dataframe1,geo_dataframe2])
|
|
557
|
+
|
|
558
|
+
skey linestrings polygons
|
|
559
|
+
1009 None MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
|
|
560
|
+
1005 None POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
|
|
561
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80) None
|
|
562
|
+
1004 None POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
|
|
563
|
+
1003 None POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
|
|
564
|
+
1001 None POLYGON ((0 0,0 20,20 20,20 0,0 0))
|
|
565
|
+
1002 None POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
|
|
566
|
+
1007 None MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
|
|
567
|
+
1006 None POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
|
|
568
|
+
1008 None MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
|
|
569
|
+
|
|
570
|
+
# Perform concatenation of a DataFrame and GeoDataFrame which returns a GeoDataFrame.
|
|
571
|
+
>>> normal_df=df.select(['id','stats'])
|
|
572
|
+
>>> normal_df
|
|
573
|
+
stats
|
|
574
|
+
id
|
|
575
|
+
34 Advanced
|
|
576
|
+
32 Advanced
|
|
577
|
+
11 Advanced
|
|
578
|
+
40 Novice
|
|
579
|
+
38 Advanced
|
|
580
|
+
36 Advanced
|
|
581
|
+
7 Novice
|
|
582
|
+
26 Advanced
|
|
583
|
+
19 Advanced
|
|
584
|
+
13 Advanced
|
|
585
|
+
>>> geo_df = geo_dataframe[geo_dataframe.skey < 1010].select(['skey', 'polygons'])
|
|
586
|
+
>>> geo_df
|
|
587
|
+
|
|
588
|
+
skey polygons
|
|
589
|
+
1003 POLYGON ((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8))
|
|
590
|
+
1008 MULTIPOLYGON (((0 0,0 20,20 20,20 0,0 0)),((0.6 0.8,0.6 20.8,20.6 20.8,20.6 0.8,0.6 0.8)))
|
|
591
|
+
1006 POLYGON ((0 0 0,0 0 20,0 20 0,0 20 20,20 0 0,20 0 20,20 20 0,20 20 20,0 0 0))
|
|
592
|
+
1009 MULTIPOLYGON (((0 0 0,0 20 20,20 20 20,20 0 20,0 0 0)),((50 50 50,50 90 90,90 90 90,90 50 90,50 50 50)))
|
|
593
|
+
1005 POLYGON ((0 0 0,0 0 20.435,0.0 20.435 0,0.0 20.435 20.435,20.435 0.0 0,20.435 0.0 20.435,20.435 20.435 0,20.435 20.435 20.435,0 0 0))
|
|
594
|
+
1007 MULTIPOLYGON (((1 1,1 3,6 3,6 0,1 1)),((10 5,10 10,20 10,20 5,10 5)))
|
|
595
|
+
1001 POLYGON ((0 0,0 20,20 20,20 0,0 0))
|
|
596
|
+
1002 POLYGON ((0 0,0 20,20 20,20 0,0 0),(5 5,5 10,10 10,10 5,5 5))
|
|
597
|
+
1004 POLYGON ((0 0 0,0 10 20,20 20 30,20 10 0,0 0 0),(5 5 5,5 10 10,10 10 10,10 10 5,5 5 5))
|
|
598
|
+
|
|
599
|
+
>>> idf = concat([normal_df, geo_df])
|
|
600
|
+
>>> idf
|
|
601
|
+
stats skey polygons
|
|
602
|
+
id
|
|
603
|
+
38 Advanced None None
|
|
604
|
+
7 Novice None None
|
|
605
|
+
26 Advanced None None
|
|
606
|
+
17 Advanced None None
|
|
607
|
+
34 Advanced None None
|
|
608
|
+
13 Advanced None None
|
|
609
|
+
32 Advanced None None
|
|
610
|
+
11 Advanced None None
|
|
611
|
+
15 Advanced None None
|
|
612
|
+
36 Advanced None None
|
|
613
|
+
>>>
|
|
614
|
+
"""
|
|
615
|
+
concat_join_permitted_values = ['INNER', 'OUTER']
|
|
616
|
+
|
|
617
|
+
awu_matrix = []
|
|
618
|
+
awu_matrix.append(["df_list", df_list, False, (list)])
|
|
619
|
+
awu_matrix.append(["join", join, True, (str), True, concat_join_permitted_values])
|
|
620
|
+
awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
|
|
621
|
+
awu_matrix.append(["sort", sort, False, (bool)])
|
|
622
|
+
awu_matrix.append(["ignore_index", ignore_index, False, (bool)])
|
|
623
|
+
setop_type='concat'
|
|
624
|
+
|
|
625
|
+
# Validate Set operator arguments
|
|
626
|
+
__validate_setop_args(df_list, awu_matrix, setop_type)
|
|
627
|
+
|
|
628
|
+
# Generate the columns and their type to output, and check if the evaluation has to be lazy
|
|
629
|
+
master_columns_dict, is_lazy = __check_concat_compatibility(df_list, join, sort, ignore_index)
|
|
630
|
+
|
|
631
|
+
try:
|
|
632
|
+
aed_utils = AedUtils()
|
|
633
|
+
|
|
634
|
+
# Set the index_label to columns in first df's index_label if it is being projected,
|
|
635
|
+
# else set it to columns in second df's index_label if it is being projected, else go on till last.
|
|
636
|
+
# Finally set to None if none of df's have index_label
|
|
637
|
+
index_label = None
|
|
638
|
+
index_to_use = None
|
|
639
|
+
for df in df_list:
|
|
640
|
+
if df._index_label is not None and any(ind_col in master_columns_dict for ind_col in df._index_label):
|
|
641
|
+
index_label = []
|
|
642
|
+
index_to_use = df._index_label
|
|
643
|
+
break
|
|
644
|
+
|
|
645
|
+
if index_to_use is not None:
|
|
646
|
+
for ind_col in index_to_use:
|
|
647
|
+
if ind_col in master_columns_dict:
|
|
648
|
+
index_label.append(ind_col)
|
|
649
|
+
|
|
650
|
+
# Remove index columns if 'ignore_index' is set to True from master_columns_dict
|
|
651
|
+
if ignore_index and index_to_use is not None:
|
|
652
|
+
index_label = None
|
|
653
|
+
index_to_use = None
|
|
654
|
+
|
|
655
|
+
col_list = []
|
|
656
|
+
for i in range(len(df_list)):
|
|
657
|
+
col_list.append([])
|
|
658
|
+
|
|
659
|
+
# Now create the list of columns for each DataFrame to concatenate
|
|
660
|
+
type_compiler = td_type_compiler(td_dialect)
|
|
661
|
+
|
|
662
|
+
for col_name, value in master_columns_dict.items():
|
|
663
|
+
for i in range(len(col_list)):
|
|
664
|
+
# Quoting is already done for column names if column name starts with number or it is reserved keywords.
|
|
665
|
+
# Here checking again if it is teradata keyword or not for quotes.
|
|
666
|
+
column_name = UtilFuncs._process_for_teradata_keyword(value['name'])
|
|
667
|
+
if not value['col_present'][i]:
|
|
668
|
+
col_list[i].append('CAST(NULL as {}) as {}'.format(type_compiler.process(value['col_type']), column_name))
|
|
669
|
+
else:
|
|
670
|
+
col_list[i].append(column_name)
|
|
671
|
+
|
|
672
|
+
input_table_columns = []
|
|
673
|
+
for i in range(len(col_list)):
|
|
674
|
+
input_table_columns.append(','.join(col_list[i]))
|
|
675
|
+
|
|
676
|
+
concat_nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
|
|
677
|
+
'unionall' if allow_duplicates else 'union',
|
|
678
|
+
input_table_columns)
|
|
679
|
+
return __process_operation(master_columns_dict, is_lazy, setop_type, concat_nodeid, index_label, index_to_use)
|
|
680
|
+
|
|
681
|
+
except TeradataMlException:
|
|
682
|
+
raise
|
|
683
|
+
except Exception as err:
|
|
684
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
|
|
685
|
+
MessageCodes.SETOP_FAILED) from err
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
@collect_queryband(queryband="tdIntersect")
|
|
689
|
+
def td_intersect(df_list, allow_duplicates=True):
|
|
690
|
+
"""
|
|
691
|
+
DESCRIPTION:
|
|
692
|
+
Function intersects a list of teradataml DataFrames or GeoDataFrames along the index axis and
|
|
693
|
+
returns a DataFrame with rows common to all input DataFrames.
|
|
694
|
+
Note:
|
|
695
|
+
This function should be applied to data frames of the same type: either all teradataml DataFrames,
|
|
696
|
+
or all GeoDataFrames.
|
|
697
|
+
|
|
698
|
+
PARAMETERS:
|
|
699
|
+
df_list:
|
|
700
|
+
Required argument.
|
|
701
|
+
Specifies the list of teradataml DataFrames or GeoDataFrames on which the intersection is to be performed.
|
|
702
|
+
Types: list of teradataml DataFrames or GeoDataFrames
|
|
703
|
+
|
|
704
|
+
allow_duplicates:
|
|
705
|
+
Optional argument.
|
|
706
|
+
Specifies if the result of intersection can have duplicate rows.
|
|
707
|
+
Default value: True
|
|
708
|
+
Types: bool
|
|
709
|
+
|
|
710
|
+
RETURNS:
|
|
711
|
+
teradataml DataFrame when intersect is performed on teradataml DataFrames.
|
|
712
|
+
teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
|
|
713
|
+
|
|
714
|
+
RAISES:
|
|
715
|
+
TeradataMlException, TypeError
|
|
716
|
+
|
|
717
|
+
EXAMPLES:
|
|
718
|
+
>>> from teradataml import load_example_data
|
|
719
|
+
>>> load_example_data("dataframe", "setop_test1")
|
|
720
|
+
>>> load_example_data("dataframe", "setop_test2")
|
|
721
|
+
>>> load_example_data("geodataframe", ["sample_shapes"])
|
|
722
|
+
>>> from teradataml.dataframe.setop import td_intersect
|
|
723
|
+
>>>
|
|
724
|
+
>>> df1 = DataFrame('setop_test1')
|
|
725
|
+
>>> df1
|
|
726
|
+
masters gpa stats programming admitted
|
|
727
|
+
id
|
|
728
|
+
62 no 3.70 Advanced Advanced 1
|
|
729
|
+
53 yes 3.50 Beginner Novice 1
|
|
730
|
+
69 no 3.96 Advanced Advanced 1
|
|
731
|
+
61 yes 4.00 Advanced Advanced 1
|
|
732
|
+
58 no 3.13 Advanced Advanced 1
|
|
733
|
+
51 yes 3.76 Beginner Beginner 0
|
|
734
|
+
68 no 1.87 Advanced Novice 1
|
|
735
|
+
66 no 3.87 Novice Beginner 1
|
|
736
|
+
60 no 4.00 Advanced Novice 1
|
|
737
|
+
59 no 3.65 Novice Novice 1
|
|
738
|
+
>>> df2 = DataFrame('setop_test2')
|
|
739
|
+
>>> df2
|
|
740
|
+
masters gpa stats programming admitted
|
|
741
|
+
id
|
|
742
|
+
12 no 3.65 Novice Novice 1
|
|
743
|
+
15 yes 4.00 Advanced Advanced 1
|
|
744
|
+
14 yes 3.45 Advanced Advanced 0
|
|
745
|
+
20 yes 3.90 Advanced Advanced 1
|
|
746
|
+
18 yes 3.81 Advanced Advanced 1
|
|
747
|
+
17 no 3.83 Advanced Advanced 1
|
|
748
|
+
13 no 4.00 Advanced Novice 1
|
|
749
|
+
11 no 3.13 Advanced Advanced 1
|
|
750
|
+
60 no 4.00 Advanced Novice 1
|
|
751
|
+
19 yes 1.98 Advanced Advanced 0
|
|
752
|
+
>>> idf = td_intersect([df1, df2])
|
|
753
|
+
>>> idf
|
|
754
|
+
masters gpa stats programming admitted
|
|
755
|
+
id
|
|
756
|
+
64 yes 3.81 Advanced Advanced 1
|
|
757
|
+
60 no 4.00 Advanced Novice 1
|
|
758
|
+
58 no 3.13 Advanced Advanced 1
|
|
759
|
+
68 no 1.87 Advanced Novice 1
|
|
760
|
+
66 no 3.87 Novice Beginner 1
|
|
761
|
+
60 no 4.00 Advanced Novice 1
|
|
762
|
+
62 no 3.70 Advanced Advanced 1
|
|
763
|
+
>>>
|
|
764
|
+
>>> idf = td_intersect([df1, df2], allow_duplicates=False)
|
|
765
|
+
>>> idf
|
|
766
|
+
masters gpa stats programming admitted
|
|
767
|
+
id
|
|
768
|
+
64 yes 3.81 Advanced Advanced 1
|
|
769
|
+
60 no 4.00 Advanced Novice 1
|
|
770
|
+
58 no 3.13 Advanced Advanced 1
|
|
771
|
+
68 no 1.87 Advanced Novice 1
|
|
772
|
+
66 no 3.87 Novice Beginner 1
|
|
773
|
+
62 no 3.70 Advanced Advanced 1
|
|
774
|
+
>>> # intersecting more than two DataFrames
|
|
775
|
+
>>> df3 = df1[df1.gpa <= 3.5]
|
|
776
|
+
>>> df3
|
|
777
|
+
masters gpa stats programming admitted
|
|
778
|
+
id
|
|
779
|
+
58 no 3.13 Advanced Advanced 1
|
|
780
|
+
67 yes 3.46 Novice Beginner 0
|
|
781
|
+
54 yes 3.50 Beginner Advanced 1
|
|
782
|
+
68 no 1.87 Advanced Novice 1
|
|
783
|
+
53 yes 3.50 Beginner Novice 1
|
|
784
|
+
>>> idf = td_intersect([df1, df2, df3])
|
|
785
|
+
>>> idf
|
|
786
|
+
masters gpa stats programming admitted
|
|
787
|
+
id
|
|
788
|
+
58 no 3.13 Advanced Advanced 1
|
|
789
|
+
68 no 1.87 Advanced Novice 1
|
|
790
|
+
|
|
791
|
+
# Perform intersection of two GeoDataFrames.
|
|
792
|
+
>>> geo_dataframe = GeoDataFrame('sample_shapes')
|
|
793
|
+
>>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
|
|
794
|
+
>>> geo_dataframe1
|
|
795
|
+
|
|
796
|
+
skey linestrings
|
|
797
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
798
|
+
>>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
|
|
799
|
+
>>> geo_dataframe2
|
|
800
|
+
|
|
801
|
+
skey linestrings
|
|
802
|
+
1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
|
|
803
|
+
1005 LINESTRING (1 3 6,3 0 6,6 0 1)
|
|
804
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
805
|
+
1002 LINESTRING (1 3,3 0,0 1)
|
|
806
|
+
1001 LINESTRING (1 1,2 2,3 3,4 4)
|
|
807
|
+
1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
|
|
808
|
+
1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
|
|
809
|
+
1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
|
|
810
|
+
1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
|
|
811
|
+
>>> td_intersect([geo_dataframe1,geo_dataframe2])
|
|
812
|
+
|
|
813
|
+
skey linestrings
|
|
814
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
815
|
+
"""
|
|
816
|
+
awu_matrix = []
|
|
817
|
+
awu_matrix.append(["df_list", df_list, False, (list)])
|
|
818
|
+
awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
|
|
819
|
+
setop_type = 'td_intersect'
|
|
820
|
+
operation = 'intersect'
|
|
821
|
+
|
|
822
|
+
# Validate Set operator arguments
|
|
823
|
+
__validate_setop_args(df_list, awu_matrix, setop_type)
|
|
824
|
+
|
|
825
|
+
return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
@collect_queryband(queryband="tdMinus")
|
|
829
|
+
def td_minus(df_list, allow_duplicates=True):
|
|
830
|
+
"""
|
|
831
|
+
DESCRIPTION:
|
|
832
|
+
This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
|
|
833
|
+
and not in other teradataml DataFrames or GeoDataFrames along the index axis.
|
|
834
|
+
Note:
|
|
835
|
+
This function should be applied to data frames of the same type: either all teradataml DataFrames,
|
|
836
|
+
or all GeoDataFrames.
|
|
837
|
+
|
|
838
|
+
PARAMETERS:
|
|
839
|
+
df_list:
|
|
840
|
+
Required argument.
|
|
841
|
+
Specifies the list of teradataml DataFrames or GeoDataFrames on which the minus
|
|
842
|
+
operation is to be performed.
|
|
843
|
+
Types: list of teradataml DataFrames or GeoDataFrames
|
|
844
|
+
|
|
845
|
+
allow_duplicates:
|
|
846
|
+
Optional argument.
|
|
847
|
+
Specifies if the result of minus operation can have duplicate rows.
|
|
848
|
+
Default value: True
|
|
849
|
+
Types: bool
|
|
850
|
+
|
|
851
|
+
RETURNS:
|
|
852
|
+
teradataml DataFrame when operation is performed on teradataml DataFrames.
|
|
853
|
+
teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
|
|
854
|
+
|
|
855
|
+
RAISES:
|
|
856
|
+
TeradataMlException, TypeError
|
|
857
|
+
|
|
858
|
+
EXAMPLES:
|
|
859
|
+
>>> from teradataml import load_example_data
|
|
860
|
+
>>> load_example_data("dataframe", "setop_test1")
|
|
861
|
+
>>> load_example_data("dataframe", "setop_test2")
|
|
862
|
+
>>> load_example_data("geodataframe", ["sample_shapes"])
|
|
863
|
+
>>> from teradataml.dataframe.setop import td_minus
|
|
864
|
+
>>>
|
|
865
|
+
>>> df1 = DataFrame('setop_test1')
|
|
866
|
+
>>> df1
|
|
867
|
+
masters gpa stats programming admitted
|
|
868
|
+
id
|
|
869
|
+
62 no 3.70 Advanced Advanced 1
|
|
870
|
+
53 yes 3.50 Beginner Novice 1
|
|
871
|
+
69 no 3.96 Advanced Advanced 1
|
|
872
|
+
61 yes 4.00 Advanced Advanced 1
|
|
873
|
+
58 no 3.13 Advanced Advanced 1
|
|
874
|
+
51 yes 3.76 Beginner Beginner 0
|
|
875
|
+
68 no 1.87 Advanced Novice 1
|
|
876
|
+
66 no 3.87 Novice Beginner 1
|
|
877
|
+
60 no 4.00 Advanced Novice 1
|
|
878
|
+
59 no 3.65 Novice Novice 1
|
|
879
|
+
>>> df2 = DataFrame('setop_test2')
|
|
880
|
+
>>> df2
|
|
881
|
+
masters gpa stats programming admitted
|
|
882
|
+
id
|
|
883
|
+
12 no 3.65 Novice Novice 1
|
|
884
|
+
15 yes 4.00 Advanced Advanced 1
|
|
885
|
+
14 yes 3.45 Advanced Advanced 0
|
|
886
|
+
20 yes 3.90 Advanced Advanced 1
|
|
887
|
+
18 yes 3.81 Advanced Advanced 1
|
|
888
|
+
17 no 3.83 Advanced Advanced 1
|
|
889
|
+
13 no 4.00 Advanced Novice 1
|
|
890
|
+
11 no 3.13 Advanced Advanced 1
|
|
891
|
+
60 no 4.00 Advanced Novice 1
|
|
892
|
+
19 yes 1.98 Advanced Advanced 0
|
|
893
|
+
>>> idf = td_minus([df1[df1.id<55] , df2])
|
|
894
|
+
>>> idf
|
|
895
|
+
masters gpa stats programming admitted
|
|
896
|
+
id
|
|
897
|
+
51 yes 3.76 Beginner Beginner 0
|
|
898
|
+
50 yes 3.95 Beginner Beginner 0
|
|
899
|
+
54 yes 3.50 Beginner Advanced 1
|
|
900
|
+
52 no 3.70 Novice Beginner 1
|
|
901
|
+
53 yes 3.50 Beginner Novice 1
|
|
902
|
+
53 yes 3.50 Beginner Novice 1
|
|
903
|
+
>>>
|
|
904
|
+
>>> idf = td_minus([df1[df1.id<55] , df2], allow_duplicates=False)
|
|
905
|
+
>>> idf
|
|
906
|
+
masters gpa stats programming admitted
|
|
907
|
+
id
|
|
908
|
+
54 yes 3.50 Beginner Advanced 1
|
|
909
|
+
51 yes 3.76 Beginner Beginner 0
|
|
910
|
+
53 yes 3.50 Beginner Novice 1
|
|
911
|
+
50 yes 3.95 Beginner Beginner 0
|
|
912
|
+
52 no 3.70 Novice Beginner 1
|
|
913
|
+
>>> # applying minus on more than two DataFrames
|
|
914
|
+
>>> df3 = df1[df1.gpa <= 3.9]
|
|
915
|
+
>>> idf = td_minus([df1, df2, df3])
|
|
916
|
+
>>> idf
|
|
917
|
+
masters gpa stats programming admitted
|
|
918
|
+
id
|
|
919
|
+
61 yes 4.00 Advanced Advanced 1
|
|
920
|
+
50 yes 3.95 Beginner Beginner 0
|
|
921
|
+
69 no 3.96 Advanced Advanced 1
|
|
922
|
+
|
|
923
|
+
# td_minus on GeoDataFrame
|
|
924
|
+
>>> geo_dataframe = GeoDataFrame('sample_shapes')
|
|
925
|
+
>>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
|
|
926
|
+
>>> geo_dataframe1
|
|
927
|
+
skey linestrings
|
|
928
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
929
|
+
|
|
930
|
+
>>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
|
|
931
|
+
>>> geo_dataframe2
|
|
932
|
+
skey linestrings
|
|
933
|
+
1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
|
|
934
|
+
1005 LINESTRING (1 3 6,3 0 6,6 0 1)
|
|
935
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
936
|
+
1002 LINESTRING (1 3,3 0,0 1)
|
|
937
|
+
1001 LINESTRING (1 1,2 2,3 3,4 4)
|
|
938
|
+
1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
|
|
939
|
+
1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
|
|
940
|
+
1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
|
|
941
|
+
1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
|
|
942
|
+
|
|
943
|
+
>>> td_minus([geo_dataframe2,geo_dataframe1])
|
|
944
|
+
linestrings
|
|
945
|
+
skey
|
|
946
|
+
1005 LINESTRING (1 3 6,3 0 6,6 0 1)
|
|
947
|
+
1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
|
|
948
|
+
1002 LINESTRING (1 3,3 0,0 1)
|
|
949
|
+
1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
|
|
950
|
+
1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
|
|
951
|
+
1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
|
|
952
|
+
1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
|
|
953
|
+
1001 LINESTRING (1 1,2 2,3 3,4 4)
|
|
954
|
+
"""
|
|
955
|
+
awu_matrix = []
|
|
956
|
+
awu_matrix.append(["df_list", df_list, False, (list)])
|
|
957
|
+
awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
|
|
958
|
+
setop_type = 'td_except' if (inspect.stack()[3][3] and inspect.stack()[3][3] == 'td_except') else 'td_minus'
|
|
959
|
+
operation = 'minus'
|
|
960
|
+
|
|
961
|
+
# Validate Set operator arguments
|
|
962
|
+
__validate_setop_args(df_list, awu_matrix, setop_type)
|
|
963
|
+
|
|
964
|
+
return __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
@collect_queryband(queryband="tdExcept")
|
|
968
|
+
def td_except(df_list, allow_duplicates=True):
|
|
969
|
+
"""
|
|
970
|
+
DESCRIPTION:
|
|
971
|
+
This function returns the resulting rows that appear in first teradataml DataFrame or GeoDataFrame
|
|
972
|
+
and not in other teradataml DataFrames or GeoDataFrames along the index axis.
|
|
973
|
+
Note:
|
|
974
|
+
This function should be applied to data frames of the same type: either all teradataml DataFrames,
|
|
975
|
+
or all GeoDataFrames.
|
|
976
|
+
|
|
977
|
+
PARAMETERS:
|
|
978
|
+
df_list:
|
|
979
|
+
Required argument.
|
|
980
|
+
Specifies the list of teradataml DataFrames or GeoDataFrames on which the except
|
|
981
|
+
operation is to be performed.
|
|
982
|
+
Types: list of teradataml DataFrames or GeoDataFrames
|
|
983
|
+
|
|
984
|
+
allow_duplicates:
|
|
985
|
+
Optional argument.
|
|
986
|
+
Specifies if the result of except operation can have duplicate rows.
|
|
987
|
+
Default value: True
|
|
988
|
+
Types: bool
|
|
989
|
+
|
|
990
|
+
RETURNS:
|
|
991
|
+
teradataml DataFrame when operation is performed on teradataml DataFrames.
|
|
992
|
+
teradataml GeoDataFrame when operation is performed on teradataml GeoDataFrames.
|
|
993
|
+
|
|
994
|
+
RAISES:
|
|
995
|
+
TeradataMlException, TypeError
|
|
996
|
+
|
|
997
|
+
EXAMPLES:
|
|
998
|
+
>>> from teradataml import load_example_data
|
|
999
|
+
>>> load_example_data("dataframe", "setop_test1")
|
|
1000
|
+
>>> load_example_data("dataframe", "setop_test2")
|
|
1001
|
+
>>> load_example_data("geodataframe", ["sample_shapes"])
|
|
1002
|
+
>>> from teradataml.dataframe.setop import td_except
|
|
1003
|
+
>>>
|
|
1004
|
+
>>> df1 = DataFrame('setop_test1')
|
|
1005
|
+
>>> df1
|
|
1006
|
+
masters gpa stats programming admitted
|
|
1007
|
+
id
|
|
1008
|
+
62 no 3.70 Advanced Advanced 1
|
|
1009
|
+
53 yes 3.50 Beginner Novice 1
|
|
1010
|
+
69 no 3.96 Advanced Advanced 1
|
|
1011
|
+
61 yes 4.00 Advanced Advanced 1
|
|
1012
|
+
58 no 3.13 Advanced Advanced 1
|
|
1013
|
+
51 yes 3.76 Beginner Beginner 0
|
|
1014
|
+
68 no 1.87 Advanced Novice 1
|
|
1015
|
+
66 no 3.87 Novice Beginner 1
|
|
1016
|
+
60 no 4.00 Advanced Novice 1
|
|
1017
|
+
59 no 3.65 Novice Novice 1
|
|
1018
|
+
>>> df2 = DataFrame('setop_test2')
|
|
1019
|
+
>>> df2
|
|
1020
|
+
masters gpa stats programming admitted
|
|
1021
|
+
id
|
|
1022
|
+
12 no 3.65 Novice Novice 1
|
|
1023
|
+
15 yes 4.00 Advanced Advanced 1
|
|
1024
|
+
14 yes 3.45 Advanced Advanced 0
|
|
1025
|
+
20 yes 3.90 Advanced Advanced 1
|
|
1026
|
+
18 yes 3.81 Advanced Advanced 1
|
|
1027
|
+
17 no 3.83 Advanced Advanced 1
|
|
1028
|
+
13 no 4.00 Advanced Novice 1
|
|
1029
|
+
11 no 3.13 Advanced Advanced 1
|
|
1030
|
+
60 no 4.00 Advanced Novice 1
|
|
1031
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1032
|
+
>>> idf = td_except([df1[df1.id<55] , df2])
|
|
1033
|
+
>>> idf
|
|
1034
|
+
masters gpa stats programming admitted
|
|
1035
|
+
id
|
|
1036
|
+
51 yes 3.76 Beginner Beginner 0
|
|
1037
|
+
50 yes 3.95 Beginner Beginner 0
|
|
1038
|
+
54 yes 3.50 Beginner Advanced 1
|
|
1039
|
+
52 no 3.70 Novice Beginner 1
|
|
1040
|
+
53 yes 3.50 Beginner Novice 1
|
|
1041
|
+
53 yes 3.50 Beginner Novice 1
|
|
1042
|
+
>>>
|
|
1043
|
+
>>> idf = td_except([df1[df1.id<55] , df2], allow_duplicates=False)
|
|
1044
|
+
>>> idf
|
|
1045
|
+
masters gpa stats programming admitted
|
|
1046
|
+
id
|
|
1047
|
+
54 yes 3.50 Beginner Advanced 1
|
|
1048
|
+
51 yes 3.76 Beginner Beginner 0
|
|
1049
|
+
53 yes 3.50 Beginner Novice 1
|
|
1050
|
+
50 yes 3.95 Beginner Beginner 0
|
|
1051
|
+
52 no 3.70 Novice Beginner 1
|
|
1052
|
+
>>> # applying except on more than two DataFrames
|
|
1053
|
+
>>> df3 = df1[df1.gpa <= 3.9]
|
|
1054
|
+
>>> idf = td_except([df1, df2, df3])
|
|
1055
|
+
>>> idf
|
|
1056
|
+
masters gpa stats programming admitted
|
|
1057
|
+
id
|
|
1058
|
+
61 yes 4.00 Advanced Advanced 1
|
|
1059
|
+
50 yes 3.95 Beginner Beginner 0
|
|
1060
|
+
69 no 3.96 Advanced Advanced 1
|
|
1061
|
+
|
|
1062
|
+
# td_except on GeoDataFrames
|
|
1063
|
+
>>> geo_dataframe = GeoDataFrame('sample_shapes')
|
|
1064
|
+
>>> geo_dataframe1 = geo_dataframe[geo_dataframe.skey == 1004].select(['skey','linestrings'])
|
|
1065
|
+
>>> geo_dataframe1
|
|
1066
|
+
skey linestrings
|
|
1067
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
1068
|
+
|
|
1069
|
+
>>> geo_dataframe2 = geo_dataframe[geo_dataframe.skey < 1010].select(['skey','linestrings'])
|
|
1070
|
+
>>> geo_dataframe2
|
|
1071
|
+
skey linestrings
|
|
1072
|
+
1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
|
|
1073
|
+
1005 LINESTRING (1 3 6,3 0 6,6 0 1)
|
|
1074
|
+
1004 LINESTRING (10 20 30,40 50 60,70 80 80)
|
|
1075
|
+
1002 LINESTRING (1 3,3 0,0 1)
|
|
1076
|
+
1001 LINESTRING (1 1,2 2,3 3,4 4)
|
|
1077
|
+
1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
|
|
1078
|
+
1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
|
|
1079
|
+
1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
|
|
1080
|
+
1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
|
|
1081
|
+
|
|
1082
|
+
>>> td_except([geo_dataframe2,geo_dataframe1])
|
|
1083
|
+
skey linestrings
|
|
1084
|
+
1008 MULTILINESTRING ((1 3,3 0,0 1),(1.35 3.6456,3.6756 0.23,0.345 1.756))
|
|
1085
|
+
1003 LINESTRING (1.35 3.6456,3.6756 0.23,0.345 1.756)
|
|
1086
|
+
1005 LINESTRING (1 3 6,3 0 6,6 0 1)
|
|
1087
|
+
1006 LINESTRING (1.35 3.6456 4.5,3.6756 0.23 6.8,0.345 1.756 8.9)
|
|
1088
|
+
1009 MULTILINESTRING ((10 20 30,40 50 60),(70 80 80,90 100 110))
|
|
1089
|
+
1001 LINESTRING (1 1,2 2,3 3,4 4)
|
|
1090
|
+
1007 MULTILINESTRING ((1 1,1 3,6 3),(10 5,20 1))
|
|
1091
|
+
1002 LINESTRING (1 3,3 0,0 1)
|
|
1092
|
+
"""
|
|
1093
|
+
return td_minus(df_list, allow_duplicates)
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
def __process_setop_operation(df_list, allow_duplicates, setop_type, operation):
|
|
1097
|
+
"""
|
|
1098
|
+
DESCRIPTION:
|
|
1099
|
+
Internal function to process set opertaion and return the result DataFrame/GeoDataFrame.
|
|
1100
|
+
|
|
1101
|
+
PARAMETERS:
|
|
1102
|
+
df_list:
|
|
1103
|
+
Required argument.
|
|
1104
|
+
Specifies the list of teradataml DataFrames/GeoDataFrames on which the except
|
|
1105
|
+
operation is to be performed.
|
|
1106
|
+
Types: list of teradataml DataFrames
|
|
1107
|
+
|
|
1108
|
+
allow_duplicates:
|
|
1109
|
+
Optional argument.
|
|
1110
|
+
Specifies if the result of except operation can have duplicate rows.
|
|
1111
|
+
Default value: True
|
|
1112
|
+
Types: bool
|
|
1113
|
+
|
|
1114
|
+
setop_type:
|
|
1115
|
+
Required argument.
|
|
1116
|
+
Specifies set opertaion.
|
|
1117
|
+
Types: str
|
|
1118
|
+
|
|
1119
|
+
operation:
|
|
1120
|
+
Required argument.
|
|
1121
|
+
Specifies set opertaion name.
|
|
1122
|
+
Types: str
|
|
1123
|
+
|
|
1124
|
+
RETURNS:
|
|
1125
|
+
teradataml DataFrame/GeoDataFrame
|
|
1126
|
+
|
|
1127
|
+
RAISES:
|
|
1128
|
+
TeradataMlException
|
|
1129
|
+
|
|
1130
|
+
EXAMPLES:
|
|
1131
|
+
>>> __process_setop_operation(df_list, allow_duplicates, setop_type, operation)
|
|
1132
|
+
|
|
1133
|
+
"""
|
|
1134
|
+
|
|
1135
|
+
# Check if set operation can be lazy or not
|
|
1136
|
+
is_lazy = __check_setop_if_lazy(df_list)
|
|
1137
|
+
|
|
1138
|
+
# Get the first DataFrame's metaexpr
|
|
1139
|
+
first_df_metaexpr = df_list[0]._metaexpr
|
|
1140
|
+
|
|
1141
|
+
try:
|
|
1142
|
+
aed_utils = AedUtils()
|
|
1143
|
+
input_table_columns = []
|
|
1144
|
+
for i in range(len(df_list)):
|
|
1145
|
+
col_list = []
|
|
1146
|
+
for j in range(len(df_list[i].columns)):
|
|
1147
|
+
col_list.append(UtilFuncs._process_for_teradata_keyword(df_list[i].columns[j]))
|
|
1148
|
+
|
|
1149
|
+
input_table_columns.append(','.join(col_list))
|
|
1150
|
+
|
|
1151
|
+
nodeid = aed_utils._aed_setop([df._nodeid for df in df_list],
|
|
1152
|
+
'{}all'.format(operation) if allow_duplicates else '{}'.format(operation),
|
|
1153
|
+
input_table_columns)
|
|
1154
|
+
|
|
1155
|
+
# Set the index_label to columns in first df's index_label if it is not None,
|
|
1156
|
+
# else set it to None i.e. no index_label.
|
|
1157
|
+
index_label = []
|
|
1158
|
+
index_to_use = None
|
|
1159
|
+
index_to_use = df_list[0]._index_label if df_list[0]._index_label is not None else None
|
|
1160
|
+
|
|
1161
|
+
if index_to_use is not None:
|
|
1162
|
+
index_label = index_to_use
|
|
1163
|
+
|
|
1164
|
+
class_name = df_list[0].__class__.__name__
|
|
1165
|
+
return __process_operation(first_df_metaexpr, is_lazy, setop_type, nodeid, index_label, index_to_use, class_name)
|
|
1166
|
+
|
|
1167
|
+
except TeradataMlException:
|
|
1168
|
+
raise
|
|
1169
|
+
except Exception as err:
|
|
1170
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SETOP_FAILED, setop_type),
|
|
1171
|
+
MessageCodes.SETOP_FAILED) from err
|