teradataml 20.0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +2762 -0
- teradataml/__init__.py +78 -0
- teradataml/_version.py +11 -0
- teradataml/analytics/Transformations.py +2996 -0
- teradataml/analytics/__init__.py +82 -0
- teradataml/analytics/analytic_function_executor.py +2416 -0
- teradataml/analytics/analytic_query_generator.py +1050 -0
- teradataml/analytics/byom/H2OPredict.py +514 -0
- teradataml/analytics/byom/PMMLPredict.py +437 -0
- teradataml/analytics/byom/__init__.py +16 -0
- teradataml/analytics/json_parser/__init__.py +133 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
- teradataml/analytics/json_parser/json_store.py +191 -0
- teradataml/analytics/json_parser/metadata.py +1666 -0
- teradataml/analytics/json_parser/utils.py +805 -0
- teradataml/analytics/meta_class.py +236 -0
- teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
- teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
- teradataml/analytics/sqle/__init__.py +128 -0
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
- teradataml/analytics/table_operator/__init__.py +11 -0
- teradataml/analytics/uaf/__init__.py +82 -0
- teradataml/analytics/utils.py +828 -0
- teradataml/analytics/valib.py +1617 -0
- teradataml/automl/__init__.py +5835 -0
- teradataml/automl/autodataprep/__init__.py +493 -0
- teradataml/automl/custom_json_utils.py +1625 -0
- teradataml/automl/data_preparation.py +1384 -0
- teradataml/automl/data_transformation.py +1254 -0
- teradataml/automl/feature_engineering.py +2273 -0
- teradataml/automl/feature_exploration.py +1873 -0
- teradataml/automl/model_evaluation.py +488 -0
- teradataml/automl/model_training.py +1407 -0
- teradataml/catalog/__init__.py +2 -0
- teradataml/catalog/byom.py +1759 -0
- teradataml/catalog/function_argument_mapper.py +859 -0
- teradataml/catalog/model_cataloging_utils.py +491 -0
- teradataml/clients/__init__.py +0 -0
- teradataml/clients/auth_client.py +137 -0
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/clients/pkce_client.py +481 -0
- teradataml/common/__init__.py +1 -0
- teradataml/common/aed_utils.py +2078 -0
- teradataml/common/bulk_exposed_utils.py +113 -0
- teradataml/common/constants.py +1669 -0
- teradataml/common/deprecations.py +166 -0
- teradataml/common/exceptions.py +147 -0
- teradataml/common/formula.py +743 -0
- teradataml/common/garbagecollector.py +666 -0
- teradataml/common/logger.py +1261 -0
- teradataml/common/messagecodes.py +518 -0
- teradataml/common/messages.py +262 -0
- teradataml/common/pylogger.py +67 -0
- teradataml/common/sqlbundle.py +764 -0
- teradataml/common/td_coltype_code_to_tdtype.py +48 -0
- teradataml/common/utils.py +3166 -0
- teradataml/common/warnings.py +36 -0
- teradataml/common/wrapper_utils.py +625 -0
- teradataml/config/__init__.py +0 -0
- teradataml/config/dummy_file1.cfg +5 -0
- teradataml/config/dummy_file2.cfg +3 -0
- teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
- teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
- teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
- teradataml/context/__init__.py +0 -0
- teradataml/context/aed_context.py +223 -0
- teradataml/context/context.py +1462 -0
- teradataml/data/A_loan.csv +19 -0
- teradataml/data/BINARY_REALS_LEFT.csv +11 -0
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
- teradataml/data/B_loan.csv +49 -0
- teradataml/data/BuoyData2.csv +17 -0
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
- teradataml/data/Convolve2RealsLeft.csv +5 -0
- teradataml/data/Convolve2RealsRight.csv +5 -0
- teradataml/data/Convolve2ValidLeft.csv +11 -0
- teradataml/data/Convolve2ValidRight.csv +11 -0
- teradataml/data/DFFTConv_Real_8_8.csv +65 -0
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/Mall_customer_data.csv +201 -0
- teradataml/data/Orders1_12mf.csv +25 -0
- teradataml/data/Pi_loan.csv +7 -0
- teradataml/data/SMOOTHED_DATA.csv +7 -0
- teradataml/data/TestDFFT8.csv +9 -0
- teradataml/data/TestRiver.csv +109 -0
- teradataml/data/Traindata.csv +28 -0
- teradataml/data/__init__.py +0 -0
- teradataml/data/acf.csv +17 -0
- teradataml/data/adaboost_example.json +34 -0
- teradataml/data/adaboostpredict_example.json +24 -0
- teradataml/data/additional_table.csv +11 -0
- teradataml/data/admissions_test.csv +21 -0
- teradataml/data/admissions_train.csv +41 -0
- teradataml/data/admissions_train_nulls.csv +41 -0
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -0
- teradataml/data/ageandpressure.csv +31 -0
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/antiselect_example.json +36 -0
- teradataml/data/antiselect_input.csv +8 -0
- teradataml/data/antiselect_input_mixed_case.csv +8 -0
- teradataml/data/applicant_external.csv +7 -0
- teradataml/data/applicant_reference.csv +7 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/arima_example.json +9 -0
- teradataml/data/assortedtext_input.csv +8 -0
- teradataml/data/attribution_example.json +34 -0
- teradataml/data/attribution_sample_table.csv +27 -0
- teradataml/data/attribution_sample_table1.csv +6 -0
- teradataml/data/attribution_sample_table2.csv +11 -0
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +43 -0
- teradataml/data/bank_web_clicks2.csv +91 -0
- teradataml/data/bank_web_url.csv +85 -0
- teradataml/data/barrier.csv +2 -0
- teradataml/data/barrier_new.csv +3 -0
- teradataml/data/betweenness_example.json +14 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -0
- teradataml/data/bin_fit_ip.csv +4 -0
- teradataml/data/binary_complex_left.csv +11 -0
- teradataml/data/binary_complex_right.csv +11 -0
- teradataml/data/binary_matrix_complex_left.csv +21 -0
- teradataml/data/binary_matrix_complex_right.csv +21 -0
- teradataml/data/binary_matrix_real_left.csv +21 -0
- teradataml/data/binary_matrix_real_right.csv +21 -0
- teradataml/data/blood2ageandweight.csv +26 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -0
- teradataml/data/burst_data.csv +5 -0
- teradataml/data/burst_example.json +21 -0
- teradataml/data/byom_example.json +34 -0
- teradataml/data/bytes_table.csv +4 -0
- teradataml/data/cal_housing_ex_raw.csv +70 -0
- teradataml/data/callers.csv +7 -0
- teradataml/data/calls.csv +10 -0
- teradataml/data/cars_hist.csv +33 -0
- teradataml/data/cat_table.csv +25 -0
- teradataml/data/ccm_example.json +32 -0
- teradataml/data/ccm_input.csv +91 -0
- teradataml/data/ccm_input2.csv +13 -0
- teradataml/data/ccmexample.csv +101 -0
- teradataml/data/ccmprepare_example.json +9 -0
- teradataml/data/ccmprepare_input.csv +91 -0
- teradataml/data/cfilter_example.json +12 -0
- teradataml/data/changepointdetection_example.json +18 -0
- teradataml/data/changepointdetectionrt_example.json +8 -0
- teradataml/data/chi_sq.csv +3 -0
- teradataml/data/churn_data.csv +14 -0
- teradataml/data/churn_emission.csv +35 -0
- teradataml/data/churn_initial.csv +3 -0
- teradataml/data/churn_state_transition.csv +5 -0
- teradataml/data/citedges_2.csv +745 -0
- teradataml/data/citvertices_2.csv +1210 -0
- teradataml/data/clicks2.csv +16 -0
- teradataml/data/clickstream.csv +13 -0
- teradataml/data/clickstream1.csv +11 -0
- teradataml/data/closeness_example.json +16 -0
- teradataml/data/complaints.csv +21 -0
- teradataml/data/complaints_mini.csv +3 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_testtoken.csv +224 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/complaints_tokens_test.csv +353 -0
- teradataml/data/complaints_traintoken.csv +472 -0
- teradataml/data/computers_category.csv +1001 -0
- teradataml/data/computers_test1.csv +1252 -0
- teradataml/data/computers_train1.csv +5009 -0
- teradataml/data/computers_train1_clustered.csv +5009 -0
- teradataml/data/confusionmatrix_example.json +9 -0
- teradataml/data/conversion_event_table.csv +3 -0
- teradataml/data/corr_input.csv +17 -0
- teradataml/data/correlation_example.json +11 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/coxhazardratio_example.json +39 -0
- teradataml/data/coxph_example.json +15 -0
- teradataml/data/coxsurvival_example.json +28 -0
- teradataml/data/cpt.csv +41 -0
- teradataml/data/credit_ex_merged.csv +45 -0
- teradataml/data/creditcard_data.csv +1001 -0
- teradataml/data/customer_loyalty.csv +301 -0
- teradataml/data/customer_loyalty_newseq.csv +31 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +173 -0
- teradataml/data/decisionforest_example.json +37 -0
- teradataml/data/decisionforestpredict_example.json +38 -0
- teradataml/data/decisiontree_example.json +21 -0
- teradataml/data/decisiontreepredict_example.json +45 -0
- teradataml/data/dfft2_size4_real.csv +17 -0
- teradataml/data/dfft2_test_matrix16.csv +17 -0
- teradataml/data/dfft2conv_real_4_4.csv +65 -0
- teradataml/data/diabetes.csv +443 -0
- teradataml/data/diabetes_test.csv +89 -0
- teradataml/data/dict_table.csv +5 -0
- teradataml/data/docperterm_table.csv +4 -0
- teradataml/data/docs/__init__.py +1 -0
- teradataml/data/docs/byom/__init__.py +0 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
- teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
- teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
- teradataml/data/docs/byom/docs/__init__.py +0 -0
- teradataml/data/docs/sqle/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
- teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
- teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/tableoperator/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
- teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/uaf/__init__.py +0 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
- teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
- teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
- teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
- teradataml/data/dtw_example.json +18 -0
- teradataml/data/dtw_t1.csv +11 -0
- teradataml/data/dtw_t2.csv +4 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt2d_example.json +16 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_example.json +15 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/dwt_filter_dim.csv +5 -0
- teradataml/data/emission.csv +9 -0
- teradataml/data/emp_table_by_dept.csv +19 -0
- teradataml/data/employee_info.csv +4 -0
- teradataml/data/employee_table.csv +6 -0
- teradataml/data/excluding_event_table.csv +2 -0
- teradataml/data/finance_data.csv +6 -0
- teradataml/data/finance_data2.csv +61 -0
- teradataml/data/finance_data3.csv +93 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -0
- teradataml/data/fmeasure_example.json +12 -0
- teradataml/data/followers_leaders.csv +10 -0
- teradataml/data/fpgrowth_example.json +12 -0
- teradataml/data/frequentpaths_example.json +29 -0
- teradataml/data/friends.csv +9 -0
- teradataml/data/fs_input.csv +33 -0
- teradataml/data/fs_input1.csv +33 -0
- teradataml/data/genData.csv +513 -0
- teradataml/data/geodataframe_example.json +40 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -0
- teradataml/data/glm_example.json +56 -0
- teradataml/data/glml1l2_example.json +28 -0
- teradataml/data/glml1l2predict_example.json +54 -0
- teradataml/data/glmpredict_example.json +54 -0
- teradataml/data/gq_t1.csv +21 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/hconvolve_complex_right.csv +5 -0
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
- teradataml/data/histogram_example.json +12 -0
- teradataml/data/hmmdecoder_example.json +79 -0
- teradataml/data/hmmevaluator_example.json +25 -0
- teradataml/data/hmmsupervised_example.json +10 -0
- teradataml/data/hmmunsupervised_example.json +8 -0
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/house_values.csv +12 -0
- teradataml/data/house_values2.csv +13 -0
- teradataml/data/housing_cat.csv +7 -0
- teradataml/data/housing_data.csv +9 -0
- teradataml/data/housing_test.csv +47 -0
- teradataml/data/housing_test_binary.csv +47 -0
- teradataml/data/housing_train.csv +493 -0
- teradataml/data/housing_train_attribute.csv +5 -0
- teradataml/data/housing_train_binary.csv +437 -0
- teradataml/data/housing_train_parameter.csv +2 -0
- teradataml/data/housing_train_response.csv +493 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -0
- teradataml/data/ibm_stock1.csv +370 -0
- teradataml/data/identitymatch_example.json +22 -0
- teradataml/data/idf_table.csv +4 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/impressions.csv +101 -0
- teradataml/data/inflation.csv +21 -0
- teradataml/data/initial.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +13 -0
- teradataml/data/insurance.csv +1339 -0
- teradataml/data/interpolator_example.json +13 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/iris_altinput.csv +481 -0
- teradataml/data/iris_attribute_output.csv +8 -0
- teradataml/data/iris_attribute_test.csv +121 -0
- teradataml/data/iris_attribute_train.csv +481 -0
- teradataml/data/iris_category_expect_predict.csv +31 -0
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -0
- teradataml/data/iris_response_train.csv +121 -0
- teradataml/data/iris_test.csv +31 -0
- teradataml/data/iris_train.csv +121 -0
- teradataml/data/join_table1.csv +4 -0
- teradataml/data/join_table2.csv +4 -0
- teradataml/data/jsons/anly_function_name.json +7 -0
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/dataikupredict.json +148 -0
- teradataml/data/jsons/byom/datarobotpredict.json +147 -0
- teradataml/data/jsons/byom/h2opredict.json +195 -0
- teradataml/data/jsons/byom/onnxembeddings.json +267 -0
- teradataml/data/jsons/byom/onnxpredict.json +187 -0
- teradataml/data/jsons/byom/pmmlpredict.json +147 -0
- teradataml/data/jsons/paired_functions.json +450 -0
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/kmeans_example.json +23 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +51 -0
- teradataml/data/knn_example.json +19 -0
- teradataml/data/knnrecommender_example.json +7 -0
- teradataml/data/knnrecommenderpredict_example.json +12 -0
- teradataml/data/lar_example.json +17 -0
- teradataml/data/larpredict_example.json +30 -0
- teradataml/data/lc_new_predictors.csv +5 -0
- teradataml/data/lc_new_reference.csv +9 -0
- teradataml/data/lda_example.json +9 -0
- teradataml/data/ldainference_example.json +15 -0
- teradataml/data/ldatopicsummary_example.json +9 -0
- teradataml/data/levendist_input.csv +13 -0
- teradataml/data/levenshteindistance_example.json +10 -0
- teradataml/data/linreg_example.json +10 -0
- teradataml/data/load_example_data.py +350 -0
- teradataml/data/loan_prediction.csv +295 -0
- teradataml/data/lungcancer.csv +138 -0
- teradataml/data/mappingdata.csv +12 -0
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/milk_timeseries.csv +157 -0
- teradataml/data/min_max_titanic.csv +4 -0
- teradataml/data/minhash_example.json +6 -0
- teradataml/data/ml_ratings.csv +7547 -0
- teradataml/data/ml_ratings_10.csv +2445 -0
- teradataml/data/mobile_data.csv +13 -0
- teradataml/data/model1_table.csv +5 -0
- teradataml/data/model2_table.csv +5 -0
- teradataml/data/models/License_file.txt +1 -0
- teradataml/data/models/License_file_empty.txt +0 -0
- teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
- teradataml/data/models/dr_iris_rf +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
- teradataml/data/models/iris_db_glm_model.pmml +57 -0
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
- teradataml/data/models/iris_kmeans_model +0 -0
- teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
- teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
- teradataml/data/modularity_example.json +12 -0
- teradataml/data/movavg_example.json +8 -0
- teradataml/data/mtx1.csv +7 -0
- teradataml/data/mtx2.csv +13 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -0
- teradataml/data/naivebayes_example.json +10 -0
- teradataml/data/naivebayespredict_example.json +19 -0
- teradataml/data/naivebayestextclassifier2_example.json +7 -0
- teradataml/data/naivebayestextclassifier_example.json +8 -0
- teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
- teradataml/data/name_Find_configure.csv +10 -0
- teradataml/data/namedentityfinder_example.json +14 -0
- teradataml/data/namedentityfinderevaluator_example.json +10 -0
- teradataml/data/namedentityfindertrainer_example.json +6 -0
- teradataml/data/nb_iris_input_test.csv +31 -0
- teradataml/data/nb_iris_input_train.csv +121 -0
- teradataml/data/nbp_iris_model.csv +13 -0
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_extractor_text.csv +2 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/ner_sports_test2.csv +29 -0
- teradataml/data/ner_sports_train.csv +501 -0
- teradataml/data/nerevaluator_example.json +6 -0
- teradataml/data/nerextractor_example.json +18 -0
- teradataml/data/nermem_sports_test.csv +18 -0
- teradataml/data/nermem_sports_train.csv +51 -0
- teradataml/data/nertrainer_example.json +7 -0
- teradataml/data/ngrams_example.json +7 -0
- teradataml/data/notebooks/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
- teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
- teradataml/data/npath_example.json +23 -0
- teradataml/data/ntree_example.json +14 -0
- teradataml/data/numeric_strings.csv +5 -0
- teradataml/data/numerics.csv +4 -0
- teradataml/data/ocean_buoy.csv +17 -0
- teradataml/data/ocean_buoy2.csv +17 -0
- teradataml/data/ocean_buoys.csv +28 -0
- teradataml/data/ocean_buoys2.csv +10 -0
- teradataml/data/ocean_buoys_nonpti.csv +28 -0
- teradataml/data/ocean_buoys_seq.csv +29 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -0
- teradataml/data/orders1.csv +11 -0
- teradataml/data/orders1_12.csv +13 -0
- teradataml/data/orders_ex.csv +4 -0
- teradataml/data/pack_example.json +9 -0
- teradataml/data/package_tracking.csv +19 -0
- teradataml/data/package_tracking_pti.csv +19 -0
- teradataml/data/pagerank_example.json +13 -0
- teradataml/data/paragraphs_input.csv +6 -0
- teradataml/data/pathanalyzer_example.json +8 -0
- teradataml/data/pathgenerator_example.json +8 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/phrases.csv +7 -0
- teradataml/data/pivot_example.json +9 -0
- teradataml/data/pivot_input.csv +22 -0
- teradataml/data/playerRating.csv +31 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/postagger_example.json +7 -0
- teradataml/data/posttagger_output.csv +44 -0
- teradataml/data/production_data.csv +17 -0
- teradataml/data/production_data2.csv +7 -0
- teradataml/data/randomsample_example.json +32 -0
- teradataml/data/randomwalksample_example.json +9 -0
- teradataml/data/rank_table.csv +6 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/ref_mobile_data.csv +4 -0
- teradataml/data/ref_mobile_data_dense.csv +2 -0
- teradataml/data/ref_url.csv +17 -0
- teradataml/data/restaurant_reviews.csv +7 -0
- teradataml/data/retail_churn_table.csv +27772 -0
- teradataml/data/river_data.csv +145 -0
- teradataml/data/roc_example.json +8 -0
- teradataml/data/roc_input.csv +101 -0
- teradataml/data/rule_inputs.csv +6 -0
- teradataml/data/rule_table.csv +2 -0
- teradataml/data/sales.csv +7 -0
- teradataml/data/sales_transaction.csv +501 -0
- teradataml/data/salesdata.csv +342 -0
- teradataml/data/sample_cities.csv +3 -0
- teradataml/data/sample_shapes.csv +11 -0
- teradataml/data/sample_streets.csv +3 -0
- teradataml/data/sampling_example.json +16 -0
- teradataml/data/sax_example.json +17 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -0
- teradataml/data/scale_housing.csv +11 -0
- teradataml/data/scale_housing_test.csv +6 -0
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -0
- teradataml/data/scalebypartition_example.json +13 -0
- teradataml/data/scalemap_example.json +13 -0
- teradataml/data/scalesummary_example.json +12 -0
- teradataml/data/score_category.csv +101 -0
- teradataml/data/score_summary.csv +4 -0
- teradataml/data/script_example.json +10 -0
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/lightgbm/dataset.template +175 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +16 -0
- teradataml/data/scripts/mapper_replace.py +16 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/seeds.csv +10 -0
- teradataml/data/sentenceextractor_example.json +7 -0
- teradataml/data/sentiment_extract_input.csv +11 -0
- teradataml/data/sentiment_train.csv +16 -0
- teradataml/data/sentiment_word.csv +20 -0
- teradataml/data/sentiment_word_input.csv +20 -0
- teradataml/data/sentimentextractor_example.json +24 -0
- teradataml/data/sentimenttrainer_example.json +8 -0
- teradataml/data/sequence_table.csv +10 -0
- teradataml/data/seriessplitter_example.json +8 -0
- teradataml/data/sessionize_example.json +17 -0
- teradataml/data/sessionize_table.csv +116 -0
- teradataml/data/setop_test1.csv +24 -0
- teradataml/data/setop_test2.csv +22 -0
- teradataml/data/soc_nw_edges.csv +11 -0
- teradataml/data/soc_nw_vertices.csv +8 -0
- teradataml/data/souvenir_timeseries.csv +168 -0
- teradataml/data/sparse_iris_attribute.csv +5 -0
- teradataml/data/sparse_iris_test.csv +121 -0
- teradataml/data/sparse_iris_train.csv +601 -0
- teradataml/data/star1.csv +6 -0
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/state_transition.csv +5 -0
- teradataml/data/stock_data.csv +53 -0
- teradataml/data/stock_movement.csv +11 -0
- teradataml/data/stock_vol.csv +76 -0
- teradataml/data/stop_words.csv +8 -0
- teradataml/data/store_sales.csv +37 -0
- teradataml/data/stringsimilarity_example.json +8 -0
- teradataml/data/strsimilarity_input.csv +13 -0
- teradataml/data/students.csv +101 -0
- teradataml/data/svm_iris_input_test.csv +121 -0
- teradataml/data/svm_iris_input_train.csv +481 -0
- teradataml/data/svm_iris_model.csv +7 -0
- teradataml/data/svmdense_example.json +10 -0
- teradataml/data/svmdensepredict_example.json +19 -0
- teradataml/data/svmsparse_example.json +8 -0
- teradataml/data/svmsparsepredict_example.json +14 -0
- teradataml/data/svmsparsesummary_example.json +8 -0
- teradataml/data/target_mobile_data.csv +13 -0
- teradataml/data/target_mobile_data_dense.csv +5 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/templatedata.csv +1201 -0
- teradataml/data/templates/open_source_ml.json +11 -0
- teradataml/data/teradata_icon.ico +0 -0
- teradataml/data/teradataml_example.json +1473 -0
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -0
- teradataml/data/test_pacf_12.csv +37 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -0
- teradataml/data/text_inputs.csv +6 -0
- teradataml/data/textchunker_example.json +8 -0
- teradataml/data/textclassifier_example.json +7 -0
- teradataml/data/textclassifier_input.csv +7 -0
- teradataml/data/textclassifiertrainer_example.json +7 -0
- teradataml/data/textmorph_example.json +11 -0
- teradataml/data/textparser_example.json +15 -0
- teradataml/data/texttagger_example.json +12 -0
- teradataml/data/texttokenizer_example.json +7 -0
- teradataml/data/texttrainer_input.csv +11 -0
- teradataml/data/tf_example.json +7 -0
- teradataml/data/tfidf_example.json +14 -0
- teradataml/data/tfidf_input1.csv +201 -0
- teradataml/data/tfidf_train.csv +6 -0
- teradataml/data/time_table1.csv +535 -0
- teradataml/data/time_table2.csv +14 -0
- teradataml/data/timeseriesdata.csv +1601 -0
- teradataml/data/timeseriesdatasetsd4.csv +105 -0
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic.csv +892 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/token_table.csv +696 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +28 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/transformation_table.csv +6 -0
- teradataml/data/transformation_table_new.csv +2 -0
- teradataml/data/tv_spots.csv +16 -0
- teradataml/data/twod_climate_data.csv +117 -0
- teradataml/data/uaf_example.json +529 -0
- teradataml/data/univariatestatistics_example.json +9 -0
- teradataml/data/unpack_example.json +10 -0
- teradataml/data/unpivot_example.json +25 -0
- teradataml/data/unpivot_input.csv +8 -0
- teradataml/data/url_data.csv +10 -0
- teradataml/data/us_air_pass.csv +37 -0
- teradataml/data/us_population.csv +624 -0
- teradataml/data/us_states_shapes.csv +52 -0
- teradataml/data/varmax_example.json +18 -0
- teradataml/data/vectordistance_example.json +30 -0
- teradataml/data/ville_climatedata.csv +121 -0
- teradataml/data/ville_tempdata.csv +12 -0
- teradataml/data/ville_tempdata1.csv +12 -0
- teradataml/data/ville_temperature.csv +11 -0
- teradataml/data/waveletTable.csv +1605 -0
- teradataml/data/waveletTable2.csv +1605 -0
- teradataml/data/weightedmovavg_example.json +9 -0
- teradataml/data/wft_testing.csv +5 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +6 -0
- teradataml/data/word_embed_input_table2.csv +5 -0
- teradataml/data/word_embed_model.csv +23 -0
- teradataml/data/words_input.csv +13 -0
- teradataml/data/xconvolve_complex_left.csv +6 -0
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
- teradataml/data/xgboost_example.json +36 -0
- teradataml/data/xgboostpredict_example.json +32 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/__init__.py +0 -0
- teradataml/dataframe/copy_to.py +2446 -0
- teradataml/dataframe/data_transfer.py +2840 -0
- teradataml/dataframe/dataframe.py +20908 -0
- teradataml/dataframe/dataframe_utils.py +2114 -0
- teradataml/dataframe/fastload.py +794 -0
- teradataml/dataframe/functions.py +2110 -0
- teradataml/dataframe/indexer.py +424 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +1171 -0
- teradataml/dataframe/sql.py +10904 -0
- teradataml/dataframe/sql_function_parameters.py +440 -0
- teradataml/dataframe/sql_functions.py +652 -0
- teradataml/dataframe/sql_interfaces.py +220 -0
- teradataml/dataframe/vantage_function_types.py +675 -0
- teradataml/dataframe/window.py +694 -0
- teradataml/dbutils/__init__.py +3 -0
- teradataml/dbutils/dbutils.py +2871 -0
- teradataml/dbutils/filemgr.py +318 -0
- teradataml/gen_ai/__init__.py +2 -0
- teradataml/gen_ai/convAI.py +473 -0
- teradataml/geospatial/__init__.py +4 -0
- teradataml/geospatial/geodataframe.py +1105 -0
- teradataml/geospatial/geodataframecolumn.py +392 -0
- teradataml/geospatial/geometry_types.py +926 -0
- teradataml/hyperparameter_tuner/__init__.py +1 -0
- teradataml/hyperparameter_tuner/optimizer.py +4115 -0
- teradataml/hyperparameter_tuner/utils.py +303 -0
- teradataml/lib/__init__.py +0 -0
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/lib/libaed_0_1_ppc64le.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/_base.py +1321 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/_constants.py +61 -0
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +267 -0
- teradataml/options/__init__.py +148 -0
- teradataml/options/configure.py +489 -0
- teradataml/options/display.py +187 -0
- teradataml/plot/__init__.py +3 -0
- teradataml/plot/axis.py +1427 -0
- teradataml/plot/constants.py +15 -0
- teradataml/plot/figure.py +431 -0
- teradataml/plot/plot.py +810 -0
- teradataml/plot/query_generator.py +83 -0
- teradataml/plot/subplot.py +216 -0
- teradataml/scriptmgmt/UserEnv.py +4273 -0
- teradataml/scriptmgmt/__init__.py +3 -0
- teradataml/scriptmgmt/lls_utils.py +2157 -0
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/__init__.py +0 -0
- teradataml/series/series.py +537 -0
- teradataml/series/series_utils.py +71 -0
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +658 -0
- teradataml/store/feature_store/feature_store.py +4814 -0
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +7330 -0
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/Apply.py +979 -0
- teradataml/table_operators/Script.py +1739 -0
- teradataml/table_operators/TableOperator.py +1343 -0
- teradataml/table_operators/__init__.py +2 -0
- teradataml/table_operators/apply_query_generator.py +262 -0
- teradataml/table_operators/query_generator.py +493 -0
- teradataml/table_operators/table_operator_query_generator.py +462 -0
- teradataml/table_operators/table_operator_util.py +726 -0
- teradataml/table_operators/templates/dataframe_apply.template +184 -0
- teradataml/table_operators/templates/dataframe_map.template +176 -0
- teradataml/table_operators/templates/dataframe_register.template +73 -0
- teradataml/table_operators/templates/dataframe_udf.template +67 -0
- teradataml/table_operators/templates/script_executor.template +170 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +53 -0
- teradataml/utils/__init__.py +0 -0
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +943 -0
- teradataml/utils/internal_buffer.py +122 -0
- teradataml/utils/print_versions.py +206 -0
- teradataml/utils/utils.py +451 -0
- teradataml/utils/validators.py +3305 -0
- teradataml-20.0.0.8.dist-info/METADATA +2804 -0
- teradataml-20.0.0.8.dist-info/RECORD +1208 -0
- teradataml-20.0.0.8.dist-info/WHEEL +5 -0
- teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
- teradataml-20.0.0.8.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,2446 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# ##################################################################
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2018 Teradata. All rights reserved.
|
|
5
|
+
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
6
|
+
#
|
|
7
|
+
# ##################################################################
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
import datetime
|
|
11
|
+
import warnings
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import pandas.api.types as pt
|
|
14
|
+
|
|
15
|
+
from sqlalchemy import MetaData, Table, Column
|
|
16
|
+
from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
|
|
17
|
+
from teradataml.dataframe.sql import ColumnExpression
|
|
18
|
+
from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
|
|
19
|
+
from teradatasqlalchemy import (TIMESTAMP, DATE)
|
|
20
|
+
from teradatasqlalchemy import (VARCHAR)
|
|
21
|
+
from teradatasqlalchemy import (PERIOD_DATE,PERIOD_TIMESTAMP)
|
|
22
|
+
from teradatasqlalchemy.dialect import TDCreateTablePost as post
|
|
23
|
+
from teradataml.common.aed_utils import AedUtils
|
|
24
|
+
from teradataml.context.context import *
|
|
25
|
+
from teradataml.dataframe import dataframe as tdmldf
|
|
26
|
+
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
27
|
+
from teradataml.dbutils.dbutils import _rename_table
|
|
28
|
+
from teradataml.common.utils import UtilFuncs
|
|
29
|
+
from teradataml.options.configure import configure
|
|
30
|
+
from teradataml.common.constants import CopyToConstants, PTITableConstants, TeradataTypes
|
|
31
|
+
from teradatasql import OperationalError
|
|
32
|
+
from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
|
|
33
|
+
from teradataml.utils.utils import execute_sql
|
|
34
|
+
from teradataml.utils.validators import _Validators
|
|
35
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
36
|
+
from teradatasqlalchemy.dialect import dialect as td_dialect
|
|
37
|
+
|
|
38
|
+
from teradataml.utils.dtypes import _TupleOf
|
|
39
|
+
|
|
40
|
+
@collect_queryband(queryband="CpToSql")
|
|
41
|
+
def copy_to_sql(df, table_name,
|
|
42
|
+
schema_name=None, if_exists='append',
|
|
43
|
+
index=False, index_label=None,
|
|
44
|
+
primary_index=None,
|
|
45
|
+
temporary=False, types = None,
|
|
46
|
+
primary_time_index_name = None,
|
|
47
|
+
timecode_column=None,
|
|
48
|
+
timebucket_duration = None,
|
|
49
|
+
timezero_date = None,
|
|
50
|
+
columns_list=None,
|
|
51
|
+
sequence_column=None,
|
|
52
|
+
seq_max=None,
|
|
53
|
+
set_table=False,
|
|
54
|
+
chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
|
|
55
|
+
match_column_order=True,
|
|
56
|
+
partition_by=None,
|
|
57
|
+
partition_by_case=None,
|
|
58
|
+
partition_by_range=None,
|
|
59
|
+
sub_partition=None,
|
|
60
|
+
**kwargs):
|
|
61
|
+
"""
|
|
62
|
+
Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
|
|
63
|
+
|
|
64
|
+
PARAMETERS:
|
|
65
|
+
|
|
66
|
+
df:
|
|
67
|
+
Required Argument.
|
|
68
|
+
Specifies the Pandas or teradataml DataFrame object to be saved.
|
|
69
|
+
Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
|
|
70
|
+
|
|
71
|
+
table_name:
|
|
72
|
+
Required Argument.
|
|
73
|
+
Specifies the name of the table to be created in Vantage.
|
|
74
|
+
Types : String
|
|
75
|
+
|
|
76
|
+
schema_name:
|
|
77
|
+
Optional Argument.
|
|
78
|
+
Specifies the name of the SQL schema in Teradata Vantage to write to.
|
|
79
|
+
Types: String
|
|
80
|
+
Default: None (Uses default database schema).
|
|
81
|
+
|
|
82
|
+
Note: schema_name will be ignored when temporary=True.
|
|
83
|
+
|
|
84
|
+
if_exists:
|
|
85
|
+
Optional Argument.
|
|
86
|
+
Specifies the action to take when table already exists in Vantage.
|
|
87
|
+
Types: String
|
|
88
|
+
Possible values: {'fail', 'replace', 'append'}
|
|
89
|
+
- fail: If table exists, do nothing.
|
|
90
|
+
- replace: If table exists, drop it, recreate it, and insert data.
|
|
91
|
+
- append: If table exists, insert data. Create if does not exist.
|
|
92
|
+
Default : append
|
|
93
|
+
|
|
94
|
+
Note: Replacing a table with the contents of a teradataml DataFrame based on
|
|
95
|
+
the same underlying table is not supported.
|
|
96
|
+
|
|
97
|
+
index:
|
|
98
|
+
Optional Argument.
|
|
99
|
+
Specifies whether to save Pandas DataFrame index as a column or not.
|
|
100
|
+
Types : Boolean (True or False)
|
|
101
|
+
Default : False
|
|
102
|
+
|
|
103
|
+
Note: Only use as True when attempting to save Pandas DataFrames (and not with teradataml DataFrames).
|
|
104
|
+
|
|
105
|
+
index_label:
|
|
106
|
+
Optional Argument.
|
|
107
|
+
Specifies the column label(s) for Pandas DataFrame index column(s).
|
|
108
|
+
Types : String or list of strings
|
|
109
|
+
Default : None
|
|
110
|
+
|
|
111
|
+
Note: If index_label is not specified (defaulted to None or is empty) and `index` is True, then
|
|
112
|
+
the 'names' property of the DataFrames index is used as the label(s),
|
|
113
|
+
and if that too is None or empty, then:
|
|
114
|
+
1) a default label 'index_label' or 'level_0' (when 'index_label' is already taken) is used
|
|
115
|
+
when index is standard.
|
|
116
|
+
2) default labels 'level_0', 'level_1', etc. are used when index is multi-level index.
|
|
117
|
+
|
|
118
|
+
Only use as True when attempting to save Pandas DataFrames (and not on teradataml DataFrames).
|
|
119
|
+
|
|
120
|
+
primary_index:
|
|
121
|
+
Optional Argument.
|
|
122
|
+
Specifies which column(s) to use as primary index while creating Teradata table(s) in Vantage.
|
|
123
|
+
When None, No Primary Index Teradata tables are created.
|
|
124
|
+
Types : String or list of strings
|
|
125
|
+
Default : None
|
|
126
|
+
Example:
|
|
127
|
+
primary_index = 'my_primary_index'
|
|
128
|
+
primary_index = ['my_primary_index1', 'my_primary_index2', 'my_primary_index3']
|
|
129
|
+
|
|
130
|
+
temporary:
|
|
131
|
+
Optional Argument.
|
|
132
|
+
Specifies whether to creates Vantage tables as permanent or volatile.
|
|
133
|
+
Types : Boolean (True or False)
|
|
134
|
+
Default : False
|
|
135
|
+
|
|
136
|
+
Note: When True:
|
|
137
|
+
1. volatile Tables are created, and
|
|
138
|
+
2. schema_name is ignored.
|
|
139
|
+
When False, permanent tables are created.
|
|
140
|
+
|
|
141
|
+
types
|
|
142
|
+
Optional Argument.
|
|
143
|
+
Specifies required data types for requested columns to be saved in Teradata Vantage.
|
|
144
|
+
Types: Python dictionary ({column_name1: type_value1, ... column_nameN: type_valueN})
|
|
145
|
+
Default: None
|
|
146
|
+
|
|
147
|
+
Note:
|
|
148
|
+
1. This argument accepts a dictionary of columns names and their required teradatasqlalchemy types
|
|
149
|
+
as key-value pairs, allowing to specify a subset of the columns of a specific type.
|
|
150
|
+
i) When the input is a Pandas DataFrame:
|
|
151
|
+
- When only a subset of all columns are provided, the column types for the rest are assigned
|
|
152
|
+
appropriately.
|
|
153
|
+
- When types argument is not provided, the column types are assigned
|
|
154
|
+
as listed in the following table:
|
|
155
|
+
+---------------------------+-----------------------------------------+
|
|
156
|
+
| Pandas/Numpy Type | teradatasqlalchemy Type |
|
|
157
|
+
+---------------------------+-----------------------------------------+
|
|
158
|
+
| int32 | INTEGER |
|
|
159
|
+
+---------------------------+-----------------------------------------+
|
|
160
|
+
| int64 | BIGINT |
|
|
161
|
+
+---------------------------+-----------------------------------------+
|
|
162
|
+
| bool | BYTEINT |
|
|
163
|
+
+---------------------------+-----------------------------------------+
|
|
164
|
+
| float32/float64 | FLOAT |
|
|
165
|
+
+---------------------------+-----------------------------------------+
|
|
166
|
+
| datetime64/datetime64[ns] | TIMESTAMP |
|
|
167
|
+
+---------------------------+-----------------------------------------+
|
|
168
|
+
| datetime64[ns,<time_zone>]| TIMESTAMP(timezone=True) |
|
|
169
|
+
+---------------------------+-----------------------------------------+
|
|
170
|
+
| Any other data type | VARCHAR(configure.default_varchar_size) |
|
|
171
|
+
+---------------------------+-----------------------------------------+
|
|
172
|
+
ii) When the input is a teradataml DataFrame:
|
|
173
|
+
- When only a subset of all columns are provided, the column types for the rest are retained.
|
|
174
|
+
- When types argument is not provided, the column types are retained.
|
|
175
|
+
2. This argument does not have any effect when the table specified using table_name and schema_name
|
|
176
|
+
exists and if_exists = 'append'.
|
|
177
|
+
|
|
178
|
+
primary_time_index_name:
|
|
179
|
+
Optional Argument.
|
|
180
|
+
Specifies a name for the Primary Time Index (PTI) when the table
|
|
181
|
+
to be created must be a PTI table.
|
|
182
|
+
Type: String
|
|
183
|
+
|
|
184
|
+
Note: This argument is not required or used when the table to be created
|
|
185
|
+
is not a PTI table. It will be ignored if specified without the timecode_column.
|
|
186
|
+
|
|
187
|
+
timecode_column:
|
|
188
|
+
Optional argument.
|
|
189
|
+
Required when the DataFrame must be saved as a PTI table.
|
|
190
|
+
Specifies the column in the DataFrame that reflects the form
|
|
191
|
+
of the timestamp data in the time series.
|
|
192
|
+
This column will be the TD_TIMECODE column in the table created.
|
|
193
|
+
It should be of SQL type TIMESTAMP(n), TIMESTAMP(n) WITH TIMEZONE, or DATE,
|
|
194
|
+
corresponding to Python types datetime.datetime or datetime.date, or Pandas dtype datetime64[ns].
|
|
195
|
+
Type: String
|
|
196
|
+
|
|
197
|
+
Note: When you specify this parameter, an attempt to create a PTI table
|
|
198
|
+
will be made. This argument is not required when the table to be created
|
|
199
|
+
is not a PTI table. If this argument is specified, primary_index will be ignored.
|
|
200
|
+
|
|
201
|
+
timezero_date:
|
|
202
|
+
Optional Argument.
|
|
203
|
+
Used when the DataFrame must be saved as a PTI table.
|
|
204
|
+
Specifies the earliest time series data that the PTI table will accept;
|
|
205
|
+
a date that precedes the earliest date in the time series data.
|
|
206
|
+
Value specified must be of the following format: DATE 'YYYY-MM-DD'
|
|
207
|
+
Default Value: DATE '1970-01-01'.
|
|
208
|
+
Type: String
|
|
209
|
+
|
|
210
|
+
Note: This argument is not required or used when the table to be created
|
|
211
|
+
is not a PTI table. It will be ignored if specified without the timecode_column.
|
|
212
|
+
|
|
213
|
+
timebucket_duration:
|
|
214
|
+
Optional Argument.
|
|
215
|
+
Required if columns_list is not specified or is None.
|
|
216
|
+
Used when the DataFrame must be saved as a PTI table.
|
|
217
|
+
Specifies a duration that serves to break up the time continuum in
|
|
218
|
+
the time series data into discrete groups or buckets.
|
|
219
|
+
Specified using the formal form time_unit(n), where n is a positive
|
|
220
|
+
integer, and time_unit can be any of the following:
|
|
221
|
+
CAL_YEARS, CAL_MONTHS, CAL_DAYS, WEEKS, DAYS, HOURS, MINUTES,
|
|
222
|
+
SECONDS, MILLISECONDS, or MICROSECONDS.
|
|
223
|
+
Type: String
|
|
224
|
+
|
|
225
|
+
Note: This argument is not required or used when the table to be created
|
|
226
|
+
is not a PTI table. It will be ignored if specified without the timecode_column.
|
|
227
|
+
|
|
228
|
+
columns_list:
|
|
229
|
+
Optional Argument.
|
|
230
|
+
Used when the DataFrame must be saved as a PTI table.
|
|
231
|
+
Required if timebucket_duration is not specified.
|
|
232
|
+
A list of one or more PTI table column names.
|
|
233
|
+
Type: String or list of Strings
|
|
234
|
+
|
|
235
|
+
Note: This argument is not required or used when the table to be created
|
|
236
|
+
is not a PTI table. It will be ignored if specified without the timecode_column.
|
|
237
|
+
|
|
238
|
+
sequence_column:
|
|
239
|
+
Optional Argument.
|
|
240
|
+
Used when the DataFrame must be saved as a PTI table.
|
|
241
|
+
Specifies the column of type Integer containing the unique identifier for
|
|
242
|
+
time series data readings when they are not unique in time.
|
|
243
|
+
* When specified, implies SEQUENCED, meaning more than one reading from the same
|
|
244
|
+
sensor may have the same timestamp.
|
|
245
|
+
This column will be the TD_SEQNO column in the table created.
|
|
246
|
+
* When not specified, implies NONSEQUENCED, meaning there is only one sensor reading
|
|
247
|
+
per timestamp.
|
|
248
|
+
This is the default.
|
|
249
|
+
Type: str
|
|
250
|
+
|
|
251
|
+
Note: This argument is not required or used when the table to be created
|
|
252
|
+
is not a PTI table. It will be ignored if specified without the timecode_column.
|
|
253
|
+
|
|
254
|
+
seq_max:
|
|
255
|
+
Optional Argument.
|
|
256
|
+
Used when the DataFrame must be saved as a PTI table.
|
|
257
|
+
Specifies the maximum number of sensor data rows that can have the
|
|
258
|
+
same timestamp. Can be used when 'sequenced' is True.
|
|
259
|
+
Accepted range: 1 - 2147483647.
|
|
260
|
+
Default Value: 20000.
|
|
261
|
+
Type: int
|
|
262
|
+
|
|
263
|
+
Note: This argument is not required or used when the table to be created
|
|
264
|
+
is not a PTI table. It will be ignored if specified without the timecode_column.
|
|
265
|
+
|
|
266
|
+
set_table:
|
|
267
|
+
Optional Argument.
|
|
268
|
+
Specifies a flag to determine whether to create a SET or a MULTISET table.
|
|
269
|
+
When True, a SET table is created.
|
|
270
|
+
When False, a MULTISET table is created.
|
|
271
|
+
Default Value: False
|
|
272
|
+
Type: boolean
|
|
273
|
+
|
|
274
|
+
Note: 1. Specifying set_table=True also requires specifying primary_index or timecode_column.
|
|
275
|
+
2. Creating SET table (set_table=True) may result in
|
|
276
|
+
a. an error if the source is a Pandas DataFrame having duplicate rows.
|
|
277
|
+
b. loss of duplicate rows if the source is a teradataml DataFrame.
|
|
278
|
+
3. This argument has no effect if the table already exists and if_exists='append'.
|
|
279
|
+
|
|
280
|
+
chunksize:
|
|
281
|
+
Optional Argument.
|
|
282
|
+
Specifies the number of rows to be loaded in a batch.
|
|
283
|
+
Note:
|
|
284
|
+
This is argument is used only when argument "df" is pandas DataFrame.
|
|
285
|
+
Default Value: 16383
|
|
286
|
+
Types: int
|
|
287
|
+
|
|
288
|
+
match_column_order:
|
|
289
|
+
Optional Argument.
|
|
290
|
+
Specifies whether the order of the columns in existing table matches the order of
|
|
291
|
+
the columns in the "df" or not. When set to False, the dataframe to be loaded can
|
|
292
|
+
have any order and number of columns.
|
|
293
|
+
Default Value: True
|
|
294
|
+
Types: bool
|
|
295
|
+
|
|
296
|
+
partition_by:
|
|
297
|
+
Optional Argument.
|
|
298
|
+
Specifies the columns on which partition should be created while creating the table.
|
|
299
|
+
Note:
|
|
300
|
+
1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
|
|
301
|
+
2. "primary_index" should be specified when "partition_by" is used.
|
|
302
|
+
3. Not applicable for PTI tables.
|
|
303
|
+
Types: str or ColumnExpression
|
|
304
|
+
|
|
305
|
+
partition_by_case:
|
|
306
|
+
Optional Argument.
|
|
307
|
+
Specifies different cases to partition the index while creating table.
|
|
308
|
+
Note:
|
|
309
|
+
1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
|
|
310
|
+
2. "primary_index" should be specified when "partition_by_case" is used.
|
|
311
|
+
3. Not applicable for PTI tables.
|
|
312
|
+
Types: str or ColumnExpression or tuple of ColumnExpression, str
|
|
313
|
+
|
|
314
|
+
partition_by_range:
|
|
315
|
+
Optional Argument.
|
|
316
|
+
Specifies the range of values on which partition should be created while creating a table.
|
|
317
|
+
Note:
|
|
318
|
+
1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
|
|
319
|
+
2. "primary_index" should be specified when "partition_by_range" is used.
|
|
320
|
+
3. Not applicable for PTI tables.
|
|
321
|
+
types: str or ColumnExpression
|
|
322
|
+
|
|
323
|
+
sub_partition:
|
|
324
|
+
Optional Argument.
|
|
325
|
+
Specifies the details to subpartition the main partition according to the value provided while creating the table.
|
|
326
|
+
Note:
|
|
327
|
+
1. "sub_partition" is applicable only when "partition_by_range" is specified.
|
|
328
|
+
2. Not applicable for PTI tables.
|
|
329
|
+
Types: int or Teradata Interval datatypes
|
|
330
|
+
|
|
331
|
+
**kwargs:
|
|
332
|
+
Optional keyword arguments.
|
|
333
|
+
|
|
334
|
+
valid_time_columns:
|
|
335
|
+
Optional Argument.
|
|
336
|
+
Specifies the name(s) of the valid time columns to be referred in "df".
|
|
337
|
+
When "valid_time_columns" is specified, then function considers
|
|
338
|
+
these columns as valid time dimension columns and creates a
|
|
339
|
+
valid time dimension temporal table if table does not exist.
|
|
340
|
+
Notes:
|
|
341
|
+
* If a string is provided, the column must be of PERIOD type.
|
|
342
|
+
Types: tuple of strings or str
|
|
343
|
+
|
|
344
|
+
derived_column:
|
|
345
|
+
Optional Argument.
|
|
346
|
+
Specifies the name of the derived column to be kept in the temporal table.
|
|
347
|
+
Notes:
|
|
348
|
+
* Argument is ignored if "valid_time_columns" are not specified.
|
|
349
|
+
* Argument is considered only if copy_to_sql() is creating a table.
|
|
350
|
+
* If "valid_time_columns" is specified and "derived_column" is not specified,
|
|
351
|
+
then copy_to_sql() automatically creates a derived column by adding "_" between
|
|
352
|
+
the columns mentioned in "valid_time_columns". For example,
|
|
353
|
+
if "valid_time_columns" is ('col1', 'col2') and "derived_column"
|
|
354
|
+
is not specified, then copy_to_sql() creates table with
|
|
355
|
+
derived column name as 'col1_col2'.
|
|
356
|
+
Types: str
|
|
357
|
+
|
|
358
|
+
RETURNS:
|
|
359
|
+
None
|
|
360
|
+
|
|
361
|
+
RAISES:
|
|
362
|
+
TeradataMlException
|
|
363
|
+
|
|
364
|
+
EXAMPLES:
|
|
365
|
+
1. Saving a Pandas DataFrame:
|
|
366
|
+
|
|
367
|
+
>>> from teradataml.dataframe.copy_to import copy_to_sql
|
|
368
|
+
>>> from teradatasqlalchemy.types import *
|
|
369
|
+
|
|
370
|
+
>>> df = {'emp_name': ['A1', 'A2', 'A3', 'A4'],
|
|
371
|
+
... 'emp_sage': [100, 200, 300, 400],
|
|
372
|
+
... 'emp_id': [133, 144, 155, 177],
|
|
373
|
+
... 'marks': [99.99, 97.32, 94.67, 91.00]
|
|
374
|
+
... }
|
|
375
|
+
|
|
376
|
+
>>> pandas_df = pd.DataFrame(df)
|
|
377
|
+
|
|
378
|
+
a) Save a Pandas DataFrame using a dataframe & table name only:
|
|
379
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table')
|
|
380
|
+
|
|
381
|
+
b) Saving as a SET table
|
|
382
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_set_table', index=True,
|
|
383
|
+
primary_index='index_label', set_table=True)
|
|
384
|
+
|
|
385
|
+
c) Save a Pandas DataFrame by specifying additional parameters:
|
|
386
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table_2', schema_name='alice',
|
|
387
|
+
... index=True, index_label='my_index_label', temporary=False,
|
|
388
|
+
... primary_index=['emp_id'], if_exists='append',
|
|
389
|
+
... types={'emp_name': VARCHAR, 'emp_sage':INTEGER,
|
|
390
|
+
... 'emp_id': BIGINT, 'marks': DECIMAL})
|
|
391
|
+
|
|
392
|
+
d) Saving with additional parameters as a SET table
|
|
393
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table_3', schema_name='alice',
|
|
394
|
+
... index=True, index_label='my_index_label', temporary=False,
|
|
395
|
+
... primary_index=['emp_id'], if_exists='append',
|
|
396
|
+
... types={'emp_name': VARCHAR, 'emp_sage':INTEGER,
|
|
397
|
+
... 'emp_id': BIGINT, 'marks': DECIMAL},
|
|
398
|
+
... set_table=True)
|
|
399
|
+
|
|
400
|
+
e) Saving levels in index of type MultiIndex
|
|
401
|
+
>>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
|
|
402
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table_4', schema_name='alice',
|
|
403
|
+
... index=True, index_label=['index1', 'index2'], temporary=False,
|
|
404
|
+
... primary_index=['index1'], if_exists = 'replace')
|
|
405
|
+
|
|
406
|
+
f) Save a Pandas DataFrame with VECTOR datatype:
|
|
407
|
+
>>> import pandas as pd
|
|
408
|
+
>>> VECTOR_data = {
|
|
409
|
+
... 'id': [10, 11, 12, 13],
|
|
410
|
+
... 'array_col': ['1,1', '2,2', '3,3', '4,4']
|
|
411
|
+
... }
|
|
412
|
+
>>> df = pd.DataFrame(VECTOR_data)
|
|
413
|
+
|
|
414
|
+
>>> from teradatasqlalchemy import VECTOR
|
|
415
|
+
>>> copy_to_sql(df=df, table_name='my_vector_table', types={'array_col': VECTOR})
|
|
416
|
+
|
|
417
|
+
g) Saving pandas DataFrame with partition_by:
|
|
418
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table_5', if_exists='replace',
|
|
419
|
+
... primary_index=['emp_id'],
|
|
420
|
+
... partition_by='emp_id')
|
|
421
|
+
|
|
422
|
+
h) Saving pandas DataFrame with partition_by_case:
|
|
423
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table_6', if_exists='replace',
|
|
424
|
+
... primary_index=['emp_id'],
|
|
425
|
+
... partition_by_case='emp_id > 100, emp_id < 500')
|
|
426
|
+
|
|
427
|
+
i) Saving pandas DataFrame with partition_by_range:
|
|
428
|
+
>>> copy_to_sql(df=pandas_df, table_name='my_table_7', if_exists='replace',
|
|
429
|
+
... primary_index=['emp_id'],
|
|
430
|
+
... partition_by_range='emp_id BETWEEN 100 AND 500')
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
j) Save a Pandas DataFrame with valid time columns of DATE type to a temporal table.
|
|
434
|
+
>>> import pandas as pd
|
|
435
|
+
>>> from teradataml.dataframe.copy_to import copy_to_sql
|
|
436
|
+
>>> df = pd.DataFrame({
|
|
437
|
+
... 'id': [1, 2, 3],
|
|
438
|
+
... 'start_date': pd.to_datetime(['2024-01-01', '2024-02-01', '2024-03-01']).date,
|
|
439
|
+
... 'end_date': pd.to_datetime(['2024-01-10', '2024-02-10', '2024-03-10']).date,
|
|
440
|
+
... 'description': ['a', 'b', 'c']
|
|
441
|
+
... })
|
|
442
|
+
>>> copy_to_sql(
|
|
443
|
+
... df=df,
|
|
444
|
+
... table_name='temporal_table_pandas_date',
|
|
445
|
+
... valid_time_columns=('start_date', 'end_date')
|
|
446
|
+
... )
|
|
447
|
+
|
|
448
|
+
k) Save a Pandas DataFrame with valid time columns of TIMESTAMP type
|
|
449
|
+
to a temporal table. Name the derived column as `valid_time`.
|
|
450
|
+
>>> import pandas as pd
|
|
451
|
+
>>> from teradataml.dataframe.copy_to import copy_to_sql
|
|
452
|
+
>>> df = pd.DataFrame({
|
|
453
|
+
... 'id': [1, 2, 3],
|
|
454
|
+
... 'start_time': pd.to_datetime(['2024-01-01 10:00:00', '2024-02-01 11:00:00', '2024-03-01 12:00:00']),
|
|
455
|
+
... 'end_time': pd.to_datetime(['2024-01-01 12:00:00', '2024-02-01 13:00:00', '2024-03-01 14:00:00']),
|
|
456
|
+
... 'description: ['a', 'b', 'c']
|
|
457
|
+
... })
|
|
458
|
+
>>> copy_to_sql(
|
|
459
|
+
... df=df,
|
|
460
|
+
... table_name='temporal_table_pandas_timestamp',
|
|
461
|
+
... valid_time_columns=('start_time', 'end_time'),
|
|
462
|
+
... derived_column='valid_time'
|
|
463
|
+
... )
|
|
464
|
+
|
|
465
|
+
f) Save a teradataml DataFrame with valid time column of PERIOD type to a temporal table.
|
|
466
|
+
>>> from teradataml.dataframe.dataframe import DataFrame
|
|
467
|
+
>>> from teradataml.dataframe.copy_to import copy_to_sql
|
|
468
|
+
>>> from teradataml.data.load_example_data import load_example_data
|
|
469
|
+
>>> load_example_data("teradataml", "Employee_roles")
|
|
470
|
+
>>> from teradatasqlalchemy.types import PERIOD_DATE
|
|
471
|
+
>>> df = DataFrame('Employee_roles')
|
|
472
|
+
>>> copy_to_sql(
|
|
473
|
+
... df,
|
|
474
|
+
... table_name = 'employee_roles_temporal',
|
|
475
|
+
... valid_time_column='role_validity_period',
|
|
476
|
+
... types={'role_validity_period':PERIOD_DATE}
|
|
477
|
+
... )
|
|
478
|
+
|
|
479
|
+
2. Saving a teradataml DataFrame:
|
|
480
|
+
|
|
481
|
+
>>> from teradataml.dataframe.dataframe import DataFrame
|
|
482
|
+
>>> from teradataml.dataframe.copy_to import copy_to_sql
|
|
483
|
+
>>> from teradatasqlalchemy.types import *
|
|
484
|
+
>>> from teradataml.data.load_example_data import load_example_data
|
|
485
|
+
|
|
486
|
+
>>> # Load the data to run the example.
|
|
487
|
+
>>> load_example_data("glm", "admissions_train")
|
|
488
|
+
|
|
489
|
+
>>> # Create teradataml DataFrame(s)
|
|
490
|
+
>>> df = DataFrame('admissions_train')
|
|
491
|
+
>>> df2 = df.select(['gpa', 'masters'])
|
|
492
|
+
|
|
493
|
+
a) Save a teradataml DataFrame by using only a table name:
|
|
494
|
+
>>> df2.to_sql('my_tdml_table')
|
|
495
|
+
|
|
496
|
+
b) Save a teradataml DataFrame by using additional parameters:
|
|
497
|
+
>>> df2.to_sql(table_name = 'my_tdml_table', if_exists='append',
|
|
498
|
+
primary_index = ['gpa'], temporary=False, schema_name='alice')
|
|
499
|
+
|
|
500
|
+
c) Alternatively, save a teradataml DataFrame by using copy_to_sql:
|
|
501
|
+
>>> copy_to_sql(df2, 'my_tdml_table_2')
|
|
502
|
+
|
|
503
|
+
d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
|
|
504
|
+
>>> copy_to_sql(df=df2, table_name='my_tdml_table_3', schema_name='alice',
|
|
505
|
+
... temporary=False, primary_index=None, if_exists='append',
|
|
506
|
+
... types={'masters': VARCHAR, 'gpa':INTEGER})
|
|
507
|
+
|
|
508
|
+
e) Saving as a SET table
|
|
509
|
+
>>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name='alice',
|
|
510
|
+
... temporary=False, primary_index=['gpa'], if_exists='append',
|
|
511
|
+
... types={'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
|
|
512
|
+
|
|
513
|
+
f) Saving a teradataml DataFrame into a table by partitioning the table with column 'gpa':
|
|
514
|
+
>>> copy_to_sql(df=df, table_name='my_tdml_table_4', if_exists='replace',
|
|
515
|
+
... primary_index=['gpa'],
|
|
516
|
+
... partition_by=df.gpa)
|
|
517
|
+
|
|
518
|
+
g) Saving a teradataml DataFrame into a table with two partitions as below:
|
|
519
|
+
>>> copy_to_sql(df=df, table_name='my_tdml_table_5', if_exists='replace',
|
|
520
|
+
... primary_index=['id'],
|
|
521
|
+
... partition_by_case=(df.id < 100, df.gpa < 5.0))
|
|
522
|
+
|
|
523
|
+
h) Saving a teradataml DataFrame into a table by partitioning the table with different ranges:
|
|
524
|
+
>>> copy_to_sql(df=df, table_name='my_tdml_table_6', if_exists='replace',
|
|
525
|
+
... primary_index=['id'],
|
|
526
|
+
... partition_by_range=df.id.between(1, 100))
|
|
527
|
+
|
|
528
|
+
i) Saving a teradataml DataFrame into a table by partitioning the table with different ranges.
|
|
529
|
+
Also sub-partitioning based on INTERVAL:
|
|
530
|
+
>>> load_example_data("dataframe", "sales")
|
|
531
|
+
>>> df = DataFrame('sales')
|
|
532
|
+
>>> from teradatasqlalchemy import INTERVAL_DAY
|
|
533
|
+
>>> copy_to_sql(df=df, table_name='my_tdml_table_7', if_exists='replace',
|
|
534
|
+
... primary_index="Feb"
|
|
535
|
+
... partition_by_range=df.datetime.between('2017-01-01', '2017-01-31'),
|
|
536
|
+
... sub_partition=INTERVAL_DAY(1))
|
|
537
|
+
|
|
538
|
+
j) Save a teradataml DataFrame with valid time columns of DATE type to a temporal table.
|
|
539
|
+
pdf = pd.DataFrame({
|
|
540
|
+
... 'id': [1, 2, 3],
|
|
541
|
+
... 'start_date': pd.to_datetime(['2024-01-01', '2024-02-01', '2024-03-01']).date,
|
|
542
|
+
... 'end_date': pd.to_datetime(['2024-01-10', '2024-02-10', '2024-03-10']).date,
|
|
543
|
+
... 'description': ['a', 'b', 'c']
|
|
544
|
+
... })
|
|
545
|
+
>>> df_temporal = DataFrame(data = pdf)
|
|
546
|
+
>>> copy_to_sql(df=df_temporal, table_name='temporal_table_tdml_date',
|
|
547
|
+
... valid_time_columns=('start_date', 'end_date'))
|
|
548
|
+
|
|
549
|
+
k) Save a teradataml DataFrame with valid time columns of TIMESTAMP type
|
|
550
|
+
to a temporal table. Name the derived column as `validity_period`.
|
|
551
|
+
>>> df_temporal_ts = DataFrame(data = pd.DataFrame({
|
|
552
|
+
... 'id': [1, 2, 3],
|
|
553
|
+
... 'start_time': pd.to_datetime(['2024-01-01 10:00:00', '2024-02-01 11:00:00', '2024-03-01 12:00:00']),
|
|
554
|
+
... 'end_time': pd.to_datetime(['2024-01-01 12:00:00', '2024-02-01 13:00:00', '2024-03-01 14:00:00']),
|
|
555
|
+
... 'description': ['a', 'b', 'c']
|
|
556
|
+
... }))
|
|
557
|
+
>>> copy_to_sql(df=df_temporal_ts, table_name='temporal_table_tdml_timestamp',
|
|
558
|
+
... valid_time_columns=('start_time', 'end_time'), derived_column='validity_period')
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
3. Saving a teradataml DataFrame as a PTI table:
|
|
562
|
+
|
|
563
|
+
>>> from teradataml.dataframe.dataframe import DataFrame
|
|
564
|
+
>>> from teradataml.dataframe.copy_to import copy_to_sql
|
|
565
|
+
>>> from teradataml.data.load_example_data import load_example_data
|
|
566
|
+
|
|
567
|
+
>>> load_example_data("sessionize", "sessionize_table")
|
|
568
|
+
>>> df3 = DataFrame('sessionize_table')
|
|
569
|
+
|
|
570
|
+
a) Using copy_to_sql
|
|
571
|
+
>>> copy_to_sql(df3, "test_copyto_pti",
|
|
572
|
+
... timecode_column='clicktime',
|
|
573
|
+
... columns_list='event')
|
|
574
|
+
|
|
575
|
+
b) Alternatively, using DataFrame.to_sql
|
|
576
|
+
>>> df3.to_sql(table_name = "test_copyto_pti_1",
|
|
577
|
+
... timecode_column='clicktime',
|
|
578
|
+
... columns_list='event')
|
|
579
|
+
|
|
580
|
+
c) Saving as a SET table
|
|
581
|
+
>>> copy_to_sql(df3, "test_copyto_pti_2",
|
|
582
|
+
... timecode_column='clicktime',
|
|
583
|
+
... columns_list='event',
|
|
584
|
+
... set_table=True)
|
|
585
|
+
|
|
586
|
+
"""
|
|
587
|
+
# Accept valid_time_columns and derived_column from kwargs
|
|
588
|
+
valid_time_columns = kwargs.get("valid_time_columns", None)
|
|
589
|
+
derived_column = kwargs.get("derived_column", None)
|
|
590
|
+
|
|
591
|
+
# Deriving global connection using get_connection().
|
|
592
|
+
con = get_connection()
|
|
593
|
+
|
|
594
|
+
try:
|
|
595
|
+
if con is None:
|
|
596
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE), MessageCodes.CONNECTION_FAILURE)
|
|
597
|
+
|
|
598
|
+
# Check if the table to be created must be a Primary Time Index (PTI) table.
|
|
599
|
+
# If a user specifies the timecode_column parameter, and attempt to create
|
|
600
|
+
# a PTI will be made.
|
|
601
|
+
is_pti = False
|
|
602
|
+
if timecode_column is not None:
|
|
603
|
+
is_pti = True
|
|
604
|
+
if primary_index is not None:
|
|
605
|
+
warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
|
|
606
|
+
'primary_index',
|
|
607
|
+
'timecode_column',
|
|
608
|
+
'specified'), stacklevel=2)
|
|
609
|
+
else:
|
|
610
|
+
ignored = []
|
|
611
|
+
if timezero_date is not None: ignored.append('timezero_date')
|
|
612
|
+
if timebucket_duration is not None: ignored.append('timebucket_duration')
|
|
613
|
+
if sequence_column is not None: ignored.append('sequence_column')
|
|
614
|
+
if seq_max is not None: ignored.append('seq_max')
|
|
615
|
+
if columns_list is not None and (
|
|
616
|
+
not isinstance(columns_list, list) or len(columns_list) > 0): ignored.append('columns_list')
|
|
617
|
+
if primary_time_index_name is not None: ignored.append('primary_time_index_name')
|
|
618
|
+
if len(ignored) > 0:
|
|
619
|
+
warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
|
|
620
|
+
ignored,
|
|
621
|
+
'timecode_column',
|
|
622
|
+
'missing'), stacklevel=2)
|
|
623
|
+
|
|
624
|
+
# Unset schema_name when temporary is True since volatile tables are always in the user database
|
|
625
|
+
if temporary is True:
|
|
626
|
+
if schema_name is not None:
|
|
627
|
+
warnings.warn(Messages.get_message(MessageCodes.IGNORE_ARGS_WARN,
|
|
628
|
+
'schema_name',
|
|
629
|
+
'temporary=True',
|
|
630
|
+
'specified'), stacklevel=2)
|
|
631
|
+
schema_name = None
|
|
632
|
+
|
|
633
|
+
# Validate DataFrame & related flags; Proceed only when True
|
|
634
|
+
from teradataml.dataframe.data_transfer import _DataTransferUtils
|
|
635
|
+
dt_obj = _DataTransferUtils(df=df, table_name=table_name, schema_name=schema_name,
|
|
636
|
+
if_exists=if_exists, index=index, index_label=index_label,
|
|
637
|
+
primary_index=primary_index, temporary=temporary,
|
|
638
|
+
types=types, primary_time_index_name=primary_time_index_name,
|
|
639
|
+
timecode_column=timecode_column,
|
|
640
|
+
timebucket_duration=timebucket_duration,
|
|
641
|
+
timezero_date=timezero_date, columns_list=columns_list,
|
|
642
|
+
sequence_column=sequence_column, seq_max=seq_max,
|
|
643
|
+
set_table=set_table, api_name='copy_to',
|
|
644
|
+
chunksize=chunksize, match_column_order=match_column_order)
|
|
645
|
+
|
|
646
|
+
dt_obj._validate()
|
|
647
|
+
|
|
648
|
+
# Validate partition arguments
|
|
649
|
+
_validate_partition_arguments(partition_by=partition_by,
|
|
650
|
+
partition_by_case=partition_by_case,
|
|
651
|
+
partition_by_range=partition_by_range,
|
|
652
|
+
sub_partition=sub_partition)
|
|
653
|
+
|
|
654
|
+
# If the table created must be a PTI table, then validate additional parameters
|
|
655
|
+
# Note that if the required parameters for PTI are valid, then other parameters, though being validated,
|
|
656
|
+
# will be ignored - for example, primary_index
|
|
657
|
+
if is_pti:
|
|
658
|
+
_validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
|
|
659
|
+
timezero_date, primary_time_index_name, columns_list,
|
|
660
|
+
sequence_column, seq_max, types, index, index_label)
|
|
661
|
+
|
|
662
|
+
# A table cannot be a SET table and have NO PRIMARY INDEX
|
|
663
|
+
if set_table and primary_index is None and timecode_column is None:
|
|
664
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
|
|
665
|
+
MessageCodes.SET_TABLE_NO_PI)
|
|
666
|
+
|
|
667
|
+
# Check whether valid time columns are passed to consider it as temporal table.
|
|
668
|
+
is_temporal = False
|
|
669
|
+
if valid_time_columns is not None:
|
|
670
|
+
_validate_valid_time_columns(df, valid_time_columns, derived_column,types)
|
|
671
|
+
is_temporal = True
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
# Check if destination table exists
|
|
675
|
+
table_exists = dt_obj._table_exists(con)
|
|
676
|
+
|
|
677
|
+
# Raise an exception when the table exists and if_exists = 'fail'
|
|
678
|
+
dt_obj._check_table_exists(is_table_exists=table_exists)
|
|
679
|
+
|
|
680
|
+
# Is the input DataFrame a Pandas DataFrame?
|
|
681
|
+
is_pandas_df = isinstance(df, pd.DataFrame)
|
|
682
|
+
|
|
683
|
+
# Let's also execute the node and set the table_name when df is teradataml DataFrame
|
|
684
|
+
if not is_pandas_df and df._table_name is None:
|
|
685
|
+
df._table_name = df_utils._execute_node_return_db_object_name(df._nodeid, df._metaexpr)
|
|
686
|
+
|
|
687
|
+
# Check table name conflict is present.
|
|
688
|
+
is_conflict = _check_table_name_conflict(df, table_name) if isinstance(df, tdmldf.DataFrame) and \
|
|
689
|
+
if_exists.lower() == 'replace' else False
|
|
690
|
+
|
|
691
|
+
# Create a temporary table name, When table name conflict is present.
|
|
692
|
+
if is_conflict:
|
|
693
|
+
# Store actual destination table name for later use.
|
|
694
|
+
dest_table_name = table_name
|
|
695
|
+
table_name = UtilFuncs._generate_temp_table_name(prefix=table_name,
|
|
696
|
+
table_type=TeradataConstants.TERADATA_TABLE,
|
|
697
|
+
quote=False)
|
|
698
|
+
|
|
699
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
700
|
+
# table name in fully qualified format. Because of this , test cases started
|
|
701
|
+
# failing with Blank name in quotation mark. Hence, extracted only the table name.
|
|
702
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
703
|
+
|
|
704
|
+
partition_exp, partition_func = _build_partition_expression(partition_by=partition_by,
|
|
705
|
+
partition_by_case=partition_by_case,
|
|
706
|
+
partition_by_range=partition_by_range,
|
|
707
|
+
sub_partition=sub_partition)
|
|
708
|
+
|
|
709
|
+
# Let's create the SQLAlchemy table object to recreate the table
|
|
710
|
+
if not table_exists or if_exists.lower() == 'replace':
|
|
711
|
+
if is_temporal:
|
|
712
|
+
_create_temporal_table(df, table_name, con, primary_index,
|
|
713
|
+
schema_name, valid_time_columns, derived_column,
|
|
714
|
+
types, None if not is_pandas_df else index,
|
|
715
|
+
None if not is_pandas_df else index_label)
|
|
716
|
+
else:
|
|
717
|
+
if is_pti:
|
|
718
|
+
table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
|
|
719
|
+
primary_time_index_name, timecode_column, timezero_date,
|
|
720
|
+
timebucket_duration, sequence_column, seq_max,
|
|
721
|
+
columns_list, set_table, types,
|
|
722
|
+
None if not is_pandas_df else index,
|
|
723
|
+
None if not is_pandas_df else index_label)
|
|
724
|
+
else:
|
|
725
|
+
table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
|
|
726
|
+
types, None if not is_pandas_df else index,
|
|
727
|
+
None if not is_pandas_df else index_label,
|
|
728
|
+
partition_expression=partition_exp,
|
|
729
|
+
partition_function=partition_func
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
if table is not None:
|
|
733
|
+
# If the table need to be replaced and there is no table name conflict,
|
|
734
|
+
# let's drop the existing table first
|
|
735
|
+
if table_exists and not is_conflict:
|
|
736
|
+
tbl_name = dt_obj._get_fully_qualified_table_name()
|
|
737
|
+
UtilFuncs._drop_table(tbl_name)
|
|
738
|
+
try:
|
|
739
|
+
table.create(bind=get_context())
|
|
740
|
+
except sqlachemyOperationalError as err:
|
|
741
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
|
|
742
|
+
'\n' + str(err),
|
|
743
|
+
MessageCodes.TABLE_OBJECT_CREATION_FAILED)
|
|
744
|
+
else:
|
|
745
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
|
|
746
|
+
MessageCodes.TABLE_OBJECT_CREATION_FAILED)
|
|
747
|
+
|
|
748
|
+
# Check column compatibility for insertion when table exists and if_exists = 'append'
|
|
749
|
+
if table_exists and if_exists.lower() == 'append':
|
|
750
|
+
UtilFuncs._get_warnings('set_table', set_table, 'if_exists', 'append')
|
|
751
|
+
|
|
752
|
+
table = UtilFuncs._get_sqlalchemy_table(table_name,
|
|
753
|
+
schema_name=schema_name)
|
|
754
|
+
|
|
755
|
+
if table is not None:
|
|
756
|
+
# ELE-2284
|
|
757
|
+
# We are not considering types for 'append' mode as it is a simple insert and no casting is applied
|
|
758
|
+
if is_pandas_df:
|
|
759
|
+
cols = _extract_column_info(df, index=index, index_label=index_label)
|
|
760
|
+
else:
|
|
761
|
+
cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
|
|
762
|
+
if match_column_order:
|
|
763
|
+
cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
|
|
764
|
+
is_pti, timecode_column, sequence_column, derived_column)
|
|
765
|
+
|
|
766
|
+
if not cols_compatible:
|
|
767
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
|
|
768
|
+
MessageCodes.INSERTION_INCOMPATIBLE)
|
|
769
|
+
|
|
770
|
+
# df is a Pandas DataFrame object
|
|
771
|
+
if isinstance(df, pd.DataFrame):
|
|
772
|
+
if not table_exists or if_exists.lower() == 'replace':
|
|
773
|
+
try:
|
|
774
|
+
# Support for saving Pandas index/Volatile is by manually inserting rows (batch) for now
|
|
775
|
+
if index or is_pti:
|
|
776
|
+
_insert_from_dataframe(df, con, schema_name, table_name, index,
|
|
777
|
+
chunksize, is_pti, timecode_column,
|
|
778
|
+
sequence_column, match_column_order)
|
|
779
|
+
|
|
780
|
+
# When index isn't saved & for non-PTI tables, to_sql insertion used (batch)
|
|
781
|
+
else:
|
|
782
|
+
# Empty queryband buffer before SQL call.
|
|
783
|
+
UtilFuncs._set_queryband()
|
|
784
|
+
df.to_sql(table_name, get_context(), if_exists='append', index=False, index_label=None,
|
|
785
|
+
chunksize=chunksize, schema=schema_name)
|
|
786
|
+
|
|
787
|
+
except sqlachemyOperationalError as err:
|
|
788
|
+
if "Duplicate row error" in str(err):
|
|
789
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_DUPICATE_ROW,
|
|
790
|
+
table_name),
|
|
791
|
+
MessageCodes.SET_TABLE_DUPICATE_ROW)
|
|
792
|
+
else:
|
|
793
|
+
raise
|
|
794
|
+
|
|
795
|
+
elif table_exists and if_exists.lower() == 'append':
|
|
796
|
+
_insert_from_dataframe(df, con, schema_name, table_name, index,
|
|
797
|
+
chunksize, is_pti, timecode_column,
|
|
798
|
+
sequence_column, match_column_order)
|
|
799
|
+
|
|
800
|
+
# df is a teradataml DataFrame object (to_sql wrapper used)
|
|
801
|
+
elif isinstance(df, tdmldf.DataFrame):
|
|
802
|
+
df_column_list = [col.name for col in df._metaexpr.c]
|
|
803
|
+
|
|
804
|
+
if is_pti:
|
|
805
|
+
# Reorder the column list to reposition the timecode and sequence columns
|
|
806
|
+
df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
|
|
807
|
+
|
|
808
|
+
source_tbl_name = UtilFuncs._extract_table_name(df._table_name)
|
|
809
|
+
from_schema_name = UtilFuncs._extract_db_name(df._table_name)
|
|
810
|
+
|
|
811
|
+
df_utils._insert_all_from_table(table_name, source_tbl_name, df_column_list,
|
|
812
|
+
to_schema_name=schema_name,
|
|
813
|
+
from_schema_name=from_schema_name,
|
|
814
|
+
temporary=temporary)
|
|
815
|
+
|
|
816
|
+
# While table name conflict is present, Delete the source table after creation of temporary table.
|
|
817
|
+
# Rename the temporary table to destination table name.
|
|
818
|
+
if is_conflict and if_exists.lower() == 'replace':
|
|
819
|
+
tbl_name = dt_obj._get_fully_qualified_table_name()
|
|
820
|
+
UtilFuncs._drop_table(tbl_name)
|
|
821
|
+
_rename_table(table_name, dest_table_name)
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
except (TeradataMlException, ValueError, TypeError):
|
|
825
|
+
raise
|
|
826
|
+
except Exception as err:
|
|
827
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.COPY_TO_SQL_FAIL) + str(err),
|
|
828
|
+
MessageCodes.COPY_TO_SQL_FAIL) from err
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _check_table_name_conflict(df, table_name):
|
|
832
|
+
"""
|
|
833
|
+
Check whether destination "table_name" matches with the teradataml dataframe parent nodes.
|
|
834
|
+
This function traverse the DAG graph from child node to root node and checks for table name conflict.
|
|
835
|
+
|
|
836
|
+
PARAMETERS:
|
|
837
|
+
df:
|
|
838
|
+
Required Argument.
|
|
839
|
+
Specifies the teradataml DataFrame object to be checked.
|
|
840
|
+
Types: teradataml.dataframe.dataframe.DataFrame
|
|
841
|
+
|
|
842
|
+
table_name:
|
|
843
|
+
Required Argument.
|
|
844
|
+
Specifies the name of the table to be created in Vantage.
|
|
845
|
+
Types : String
|
|
846
|
+
|
|
847
|
+
RETURNS:
|
|
848
|
+
A boolean value representing the presence of conflict.
|
|
849
|
+
|
|
850
|
+
RAISES:
|
|
851
|
+
None
|
|
852
|
+
|
|
853
|
+
EXAMPLES:
|
|
854
|
+
>>> df = DataFrame("sales")
|
|
855
|
+
>>> table_name = "destination_table"
|
|
856
|
+
>>> _check_table_name_conflict(df, table_name)
|
|
857
|
+
"""
|
|
858
|
+
aed_obj = AedUtils()
|
|
859
|
+
# Check if length of parent node count greater that 0.
|
|
860
|
+
if aed_obj._aed_get_parent_node_count(df._nodeid) > 0:
|
|
861
|
+
# Let's check "table_name" matches with any of the parent nodes table name.
|
|
862
|
+
# Get current table node id.
|
|
863
|
+
node_id = df._nodeid
|
|
864
|
+
while node_id:
|
|
865
|
+
|
|
866
|
+
# Get the parent node id using current table node id.
|
|
867
|
+
parent_node_id = aed_obj._aed_get_parent_nodeids(node_id)
|
|
868
|
+
|
|
869
|
+
if parent_node_id:
|
|
870
|
+
# Check "table_name" matches with the parent "table_name".
|
|
871
|
+
# If table name matches, then return 'True'.
|
|
872
|
+
# Otherwise, Traverse the graph from current node to the top most root node.
|
|
873
|
+
if table_name in aed_obj._aed_get_source_tablename(parent_node_id[0]):
|
|
874
|
+
return True
|
|
875
|
+
else:
|
|
876
|
+
node_id = parent_node_id[0]
|
|
877
|
+
else:
|
|
878
|
+
# When parent_node_id is empty return 'False'.
|
|
879
|
+
return False
|
|
880
|
+
return False
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def _get_sqlalchemy_table_from_tdmldf(df, meta):
|
|
884
|
+
"""
|
|
885
|
+
This is an internal function used to generate an SQLAlchemy Table
|
|
886
|
+
object for the underlying table/view of a DataFrame.
|
|
887
|
+
|
|
888
|
+
PARAMETERS:
|
|
889
|
+
df:
|
|
890
|
+
The teradataml DataFrame to generate the SQLAlchemy.Table object for.
|
|
891
|
+
|
|
892
|
+
meta:
|
|
893
|
+
The SQLAlchemy.Metadata object.
|
|
894
|
+
|
|
895
|
+
RETURNS:
|
|
896
|
+
SQLAlchemy.Table
|
|
897
|
+
|
|
898
|
+
RAISES:
|
|
899
|
+
None
|
|
900
|
+
|
|
901
|
+
EXAMPLES:
|
|
902
|
+
>>> con = get_connection()
|
|
903
|
+
>>> df = DataFrame('admissions_train')
|
|
904
|
+
>>> meta = sqlalchemy.MetaData()
|
|
905
|
+
>>> table = __get_sqlalchemy_table_from_tdmldf(df, meta)
|
|
906
|
+
|
|
907
|
+
"""
|
|
908
|
+
con = get_connection()
|
|
909
|
+
db_schema = UtilFuncs._extract_db_name(df._table_name)
|
|
910
|
+
db_table_name = UtilFuncs._extract_table_name(df._table_name)
|
|
911
|
+
|
|
912
|
+
return Table(db_table_name, meta, schema=db_schema, autoload_with=get_context())
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
def _get_index_labels(df, index_label):
|
|
916
|
+
"""
|
|
917
|
+
Internal function to construct a list of labels for the indices to be saved from the Pandas DataFrames
|
|
918
|
+
based on user input and information from the DataFrame.
|
|
919
|
+
|
|
920
|
+
PARAMETERS:
|
|
921
|
+
df:
|
|
922
|
+
The Pandas input DataFrame.
|
|
923
|
+
|
|
924
|
+
index_label:
|
|
925
|
+
The user provided label(s) for the indices.
|
|
926
|
+
|
|
927
|
+
RAISES:
|
|
928
|
+
None
|
|
929
|
+
|
|
930
|
+
RETURNS:
|
|
931
|
+
A list of Strings corresponding the to labels for the indices to add as columns.
|
|
932
|
+
|
|
933
|
+
EXAMPLES:
|
|
934
|
+
_get_index_labels(df, index_label)
|
|
935
|
+
"""
|
|
936
|
+
default_index_label = 'index_label'
|
|
937
|
+
default_level_prefix = 'level_'
|
|
938
|
+
level_cnt = 0
|
|
939
|
+
|
|
940
|
+
is_multi_index = isinstance(df.index, pd.MultiIndex)
|
|
941
|
+
ind_types = [level.dtype for level in df.index.levels] if is_multi_index else [df.index.dtype]
|
|
942
|
+
|
|
943
|
+
ind_names = []
|
|
944
|
+
if index_label:
|
|
945
|
+
ind_names = [index_label] if isinstance(index_label, str) else index_label
|
|
946
|
+
else:
|
|
947
|
+
for name in df.index.names:
|
|
948
|
+
if name not in ('', None):
|
|
949
|
+
ind_names.append(name)
|
|
950
|
+
else:
|
|
951
|
+
if is_multi_index:
|
|
952
|
+
ind_names.append(default_level_prefix + str(level_cnt))
|
|
953
|
+
level_cnt = level_cnt + 1
|
|
954
|
+
else:
|
|
955
|
+
df_columns = _get_pd_df_column_names(df)
|
|
956
|
+
label = default_level_prefix + str(level_cnt) if default_index_label in df_columns else default_index_label
|
|
957
|
+
ind_names.append(label)
|
|
958
|
+
|
|
959
|
+
return ind_names, ind_types
|
|
960
|
+
|
|
961
|
+
def _validate_partition_arguments(partition_by=None,
|
|
962
|
+
partition_by_case=None,
|
|
963
|
+
partition_by_range=None,
|
|
964
|
+
sub_partition=None):
|
|
965
|
+
"""
|
|
966
|
+
DESCRIPTION:
|
|
967
|
+
Internal function to validate the partition_by arguments.
|
|
968
|
+
|
|
969
|
+
PARAMETERS:
|
|
970
|
+
partition_by:
|
|
971
|
+
Optional argument.
|
|
972
|
+
Specifies the columns on which PARTITION BY should be created.
|
|
973
|
+
Types: str or ColumnExpression
|
|
974
|
+
|
|
975
|
+
partition_by_case:
|
|
976
|
+
Optional argument.
|
|
977
|
+
Specifies different cases to partition the index.
|
|
978
|
+
Types: str or ColumnExpression or tuple of ColumnExpression, str
|
|
979
|
+
|
|
980
|
+
partition_by_range:
|
|
981
|
+
Optional argument.
|
|
982
|
+
Specifies the range of values of Date columns on which partition to be created.
|
|
983
|
+
Types: str or ColumnExpression
|
|
984
|
+
|
|
985
|
+
sub_partition:
|
|
986
|
+
Optional argument.
|
|
987
|
+
Specifies the details to subpartition the main partition according to the value provided.
|
|
988
|
+
Types: int or Teradata Interval datatypes
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
RETURNS:
|
|
992
|
+
None
|
|
993
|
+
|
|
994
|
+
RAISES:
|
|
995
|
+
TeradataMlException
|
|
996
|
+
|
|
997
|
+
EXAMPLES:
|
|
998
|
+
>>> _validate_partition_arguments(partition_by='col1')
|
|
999
|
+
>>> _validate_partition_arguments(partition_by_case=(df.col1 < 100, df.col1 < 1000))
|
|
1000
|
+
"""
|
|
1001
|
+
# Validate partition_by argument
|
|
1002
|
+
arg_matrix = []
|
|
1003
|
+
arg_matrix.append(['partition_by', partition_by, True, (str, ColumnExpression), True])
|
|
1004
|
+
arg_matrix.append(['partition_by_case', partition_by_case, True, (ColumnExpression, str, _TupleOf((str, ColumnExpression))), True])
|
|
1005
|
+
arg_matrix.append(['partition_by_range', partition_by_range, True, (ColumnExpression, str), True])
|
|
1006
|
+
arg_matrix.append(['sub_partition', sub_partition, True, (int, TeradataTypes.TD_RANGE_N_CLAUSE_TYPES.value), True])
|
|
1007
|
+
|
|
1008
|
+
# Validate argument types
|
|
1009
|
+
_Validators._validate_function_arguments(arg_matrix)
|
|
1010
|
+
|
|
1011
|
+
# Validate mutually exclusive arguments
|
|
1012
|
+
_Validators._validate_mutually_exclusive_argument_groups({"partition_by":partition_by},
|
|
1013
|
+
{"partition_by_case":partition_by_case},
|
|
1014
|
+
{"partition_by_range":partition_by_range})
|
|
1015
|
+
|
|
1016
|
+
def _build_partition_expression(partition_by=None,
|
|
1017
|
+
partition_by_case=None,
|
|
1018
|
+
partition_by_range=None,
|
|
1019
|
+
sub_partition=None):
|
|
1020
|
+
"""
|
|
1021
|
+
DESCRIPTION:
|
|
1022
|
+
Internal function to build the partitioning expression for the table.
|
|
1023
|
+
|
|
1024
|
+
PARAMETERS:
|
|
1025
|
+
partition_by:
|
|
1026
|
+
Optional argument.
|
|
1027
|
+
Specifies the columns on which PARTITION BY should be created.
|
|
1028
|
+
Types: str or ColumnExpression
|
|
1029
|
+
|
|
1030
|
+
partition_by_case:
|
|
1031
|
+
Optional argument.
|
|
1032
|
+
Specifies different cases to partition the index.
|
|
1033
|
+
Types: str or ColumnExpression or tuple of ColumnExpression, str
|
|
1034
|
+
|
|
1035
|
+
partition_by_range:
|
|
1036
|
+
Optional argument.
|
|
1037
|
+
Specifies the range of values of Date columns on which partition to be created.
|
|
1038
|
+
Types: str or ColumnExpression
|
|
1039
|
+
|
|
1040
|
+
sub_partition:
|
|
1041
|
+
Optional argument.
|
|
1042
|
+
Specifies the details to subpartition the main partition according to the value provided.
|
|
1043
|
+
Types: int or Teradata Interval datatypes
|
|
1044
|
+
|
|
1045
|
+
RAISES:
|
|
1046
|
+
None
|
|
1047
|
+
|
|
1048
|
+
RETURNS:
|
|
1049
|
+
strings containing the partitioning expression and partition function.
|
|
1050
|
+
|
|
1051
|
+
EXAMPLES:
|
|
1052
|
+
>>> _build_partition_expression(partition_by='col1')
|
|
1053
|
+
>>> _build_partition_expression(partition_by_case=(df.col1 < 100, df.col1 < 1000))
|
|
1054
|
+
|
|
1055
|
+
"""
|
|
1056
|
+
partition_exp = None
|
|
1057
|
+
partition_fn = None
|
|
1058
|
+
# Check if partition_by expression is a ColumnExpression,
|
|
1059
|
+
# if so, compile it to a string
|
|
1060
|
+
if partition_by:
|
|
1061
|
+
partition_exp = partition_by.compile() if isinstance(partition_by, ColumnExpression) \
|
|
1062
|
+
else partition_by
|
|
1063
|
+
|
|
1064
|
+
# Check if partition_by_case is a ColumnExpression or string,
|
|
1065
|
+
# if string, join to partition_by expression
|
|
1066
|
+
# if ColumnExpression, compile it to a string and join to partition_by expression
|
|
1067
|
+
# if tuple, compile each expression to a string and join to partition_by expression
|
|
1068
|
+
if partition_by_case:
|
|
1069
|
+
partition_fn = "CASE_N"
|
|
1070
|
+
partition_by_case = [partition_by_case] if isinstance(partition_by_case, (str, ColumnExpression)) \
|
|
1071
|
+
else partition_by_case
|
|
1072
|
+
partition_exp = "{}, NO CASE, UNKNOWN".format(
|
|
1073
|
+
", ".join(str(exp.compile()) if isinstance(exp, ColumnExpression) else str(exp)
|
|
1074
|
+
for exp in partition_by_case))
|
|
1075
|
+
|
|
1076
|
+
# Check if partition_by_range is a ColumnExpression or string,
|
|
1077
|
+
# if so, compile it to a string
|
|
1078
|
+
if partition_by_range:
|
|
1079
|
+
partition_fn = "RANGE_N"
|
|
1080
|
+
sub_partition_clause = ""
|
|
1081
|
+
if isinstance(partition_by_range, ColumnExpression):
|
|
1082
|
+
partition_by_range = partition_by_range.compile()
|
|
1083
|
+
|
|
1084
|
+
# Check if sub_partition provided,
|
|
1085
|
+
# if so, complie the EACH clause for RANGE_N
|
|
1086
|
+
# If sub_partition is an int, the convert to string and add to the clause.
|
|
1087
|
+
# If sub_partition is a TeradataTypes.TD_RANGE_N_CLAUSE_TYPES,
|
|
1088
|
+
# convert to string and extract the precision and add to the clause.
|
|
1089
|
+
if sub_partition:
|
|
1090
|
+
sub_partition_clause = (
|
|
1091
|
+
f" EACH {str(sub_partition)}"
|
|
1092
|
+
if isinstance(sub_partition, int)
|
|
1093
|
+
else f" EACH INTERVAL '{sub_partition.precision}' {str(sub_partition).split(maxsplit=1)[1]}")
|
|
1094
|
+
|
|
1095
|
+
partition_exp = "{0}{1}".format(partition_by_range, sub_partition_clause)
|
|
1096
|
+
# Return partition_by expression and partition function
|
|
1097
|
+
return partition_exp, partition_fn
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
|
|
1101
|
+
timezero_date, primary_time_index_name, columns_list,
|
|
1102
|
+
sequence_column, seq_max, types, index, index_label):
|
|
1103
|
+
"""
|
|
1104
|
+
This is an internal function used to validate the PTI part of copy request.
|
|
1105
|
+
Dataframe, connection & related parameters are checked.
|
|
1106
|
+
Saving to Vantage is proceeded to only when validation returns True.
|
|
1107
|
+
|
|
1108
|
+
PARAMETERS:
|
|
1109
|
+
df:
|
|
1110
|
+
The DataFrame (Pandas or teradataml) object to be saved.
|
|
1111
|
+
|
|
1112
|
+
timecode_column:
|
|
1113
|
+
The column in the DataFrame that reflects the form of the timestamp
|
|
1114
|
+
data in the time series.
|
|
1115
|
+
Type: String
|
|
1116
|
+
|
|
1117
|
+
timebucket_duration:
|
|
1118
|
+
A duration that serves to break up the time continuum in
|
|
1119
|
+
the time series data into discrete groups or buckets.
|
|
1120
|
+
Type: String
|
|
1121
|
+
|
|
1122
|
+
timezero_date:
|
|
1123
|
+
Specifies the earliest time series data that the PTI table will accept.
|
|
1124
|
+
Type: String
|
|
1125
|
+
|
|
1126
|
+
primary_time_index_name:
|
|
1127
|
+
A name for the Primary Time Index (PTI).
|
|
1128
|
+
Type: String
|
|
1129
|
+
|
|
1130
|
+
columns_list:
|
|
1131
|
+
A list of one or more PTI table column names.
|
|
1132
|
+
Type: String or list of Strings
|
|
1133
|
+
|
|
1134
|
+
sequence_column:
|
|
1135
|
+
Specifies a column of type Integer with sequences implying that the
|
|
1136
|
+
time series data readings are not unique.
|
|
1137
|
+
If not specified, the time series data are assumed to be unique in time.
|
|
1138
|
+
Type: String
|
|
1139
|
+
|
|
1140
|
+
seq_max:
|
|
1141
|
+
Specifies the maximum number of sensor data rows that can have the
|
|
1142
|
+
same timestamp. Can be used when 'sequenced' is True.
|
|
1143
|
+
Accepted range: 1 - 2147483647.
|
|
1144
|
+
Type: int
|
|
1145
|
+
|
|
1146
|
+
types:
|
|
1147
|
+
Dictionary specifying column-name to teradatasqlalchemy type-mapping.
|
|
1148
|
+
|
|
1149
|
+
index:
|
|
1150
|
+
Flag specifying whether to write Pandas DataFrame index as a column or not.
|
|
1151
|
+
Type: bool
|
|
1152
|
+
|
|
1153
|
+
index_label:
|
|
1154
|
+
Column label for index column(s).
|
|
1155
|
+
Type: String
|
|
1156
|
+
|
|
1157
|
+
RETURNS:
|
|
1158
|
+
True, when all parameters are valid.
|
|
1159
|
+
|
|
1160
|
+
RAISES:
|
|
1161
|
+
TeradataMlException, when parameter validation fails.
|
|
1162
|
+
|
|
1163
|
+
EXAMPLES:
|
|
1164
|
+
_validate_pti_copy_parameters(df = my_df, timecode_column = 'ts', timbucket_duration = 'HOURS(2)')
|
|
1165
|
+
"""
|
|
1166
|
+
if isinstance(df, pd.DataFrame):
|
|
1167
|
+
df_columns = _get_pd_df_column_names(df)
|
|
1168
|
+
else:
|
|
1169
|
+
df_columns = [col.name for col in df._metaexpr.c]
|
|
1170
|
+
|
|
1171
|
+
awu = AnalyticsWrapperUtils()
|
|
1172
|
+
awu_matrix = []
|
|
1173
|
+
|
|
1174
|
+
# The arguments added to awu_martix are:
|
|
1175
|
+
# arg_name, arg, is_optional, acceptable types
|
|
1176
|
+
# The value for is_optional is set to False when the argument
|
|
1177
|
+
# a) is a required argument
|
|
1178
|
+
# b) is not allowed to be None, even if it is optional
|
|
1179
|
+
awu_matrix.append(['timecode_column', timecode_column, False, (str)])
|
|
1180
|
+
awu_matrix.append(['columns_list', columns_list, True, (str, list)])
|
|
1181
|
+
awu_matrix.append(['timezero_date', timezero_date, True, (str)])
|
|
1182
|
+
awu_matrix.append(['timebucket_duration', timebucket_duration, True, (str)])
|
|
1183
|
+
awu_matrix.append(['primary_time_index_name', primary_time_index_name, True, (str)])
|
|
1184
|
+
awu_matrix.append(['sequence_column', sequence_column, True, (str)])
|
|
1185
|
+
awu_matrix.append(['seq_max', seq_max, True, (int)])
|
|
1186
|
+
|
|
1187
|
+
# Validate types
|
|
1188
|
+
awu._validate_argument_types(awu_matrix)
|
|
1189
|
+
|
|
1190
|
+
# Validate arg emtpy
|
|
1191
|
+
awu._validate_input_columns_not_empty(timecode_column, 'timecode_column')
|
|
1192
|
+
awu._validate_input_columns_not_empty(columns_list, 'columns_list')
|
|
1193
|
+
awu._validate_input_columns_not_empty(timezero_date, 'timezero_date')
|
|
1194
|
+
awu._validate_input_columns_not_empty(timebucket_duration, 'timebucket_duration')
|
|
1195
|
+
awu._validate_input_columns_not_empty(sequence_column, 'sequence_column')
|
|
1196
|
+
|
|
1197
|
+
# Validate all the required arguments and optional arguments when not none
|
|
1198
|
+
# First the timecode_column
|
|
1199
|
+
_validate_column_in_list_of_columns('df', df_columns, timecode_column, 'timecode_column')
|
|
1200
|
+
# Check the type of timecode_column
|
|
1201
|
+
_validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES.value,
|
|
1202
|
+
types, index, index_label)
|
|
1203
|
+
|
|
1204
|
+
# timezero date
|
|
1205
|
+
_validate_timezero_date(timezero_date)
|
|
1206
|
+
|
|
1207
|
+
# timebucket duration
|
|
1208
|
+
_Validators._validate_timebucket_duration(timebucket_duration)
|
|
1209
|
+
|
|
1210
|
+
# Validate sequence_column
|
|
1211
|
+
if sequence_column is not None:
|
|
1212
|
+
_validate_column_in_list_of_columns('df', df_columns, sequence_column, 'sequence_column')
|
|
1213
|
+
# Check the type of sequence_column
|
|
1214
|
+
_validate_column_type(df, sequence_column, 'sequence_column',
|
|
1215
|
+
PTITableConstants.VALID_SEQUENCE_COL_DATATYPES.value, types, index, index_label)
|
|
1216
|
+
|
|
1217
|
+
# Validate seq_max
|
|
1218
|
+
if seq_max is not None and (seq_max < 1 or seq_max > 2147483647):
|
|
1219
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(seq_max, 'seq_max', '1 < integer < 2147483647'),
|
|
1220
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
1221
|
+
|
|
1222
|
+
# Validate cols_list
|
|
1223
|
+
_validate_columns_list('df', df_columns, columns_list)
|
|
1224
|
+
if isinstance(columns_list, str):
|
|
1225
|
+
columns_list = [columns_list]
|
|
1226
|
+
|
|
1227
|
+
# Either one or both of timebucket_duration and columns_list must be specified
|
|
1228
|
+
if timebucket_duration is None and (columns_list is None or len(columns_list) == 0):
|
|
1229
|
+
raise TeradataMlException(
|
|
1230
|
+
Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, 'timebucket_duration', 'columns_list'),
|
|
1231
|
+
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
1232
|
+
|
|
1233
|
+
|
|
1234
|
+
def _validate_columns_list(df, df_columns, columns_list):
|
|
1235
|
+
"""
|
|
1236
|
+
Internal function to validate columns list specified when creating a
|
|
1237
|
+
Primary Time Index (PTI) table.
|
|
1238
|
+
|
|
1239
|
+
PARAMETERS:
|
|
1240
|
+
df:
|
|
1241
|
+
Name of the DataFrame to which the column being validated
|
|
1242
|
+
does or should belong.
|
|
1243
|
+
|
|
1244
|
+
df_columns:
|
|
1245
|
+
List of columns in the DataFrame.
|
|
1246
|
+
|
|
1247
|
+
columns_list:
|
|
1248
|
+
The column or list of columns.
|
|
1249
|
+
Type: String or list of Strings
|
|
1250
|
+
|
|
1251
|
+
RETURNS:
|
|
1252
|
+
True if the column or list of columns is valid.
|
|
1253
|
+
|
|
1254
|
+
RAISES:
|
|
1255
|
+
Raise TeradataMlException on validation failure.
|
|
1256
|
+
"""
|
|
1257
|
+
if columns_list is None:
|
|
1258
|
+
return True
|
|
1259
|
+
|
|
1260
|
+
# Validate DF has columns
|
|
1261
|
+
if isinstance(columns_list, str):
|
|
1262
|
+
columns_list = [columns_list]
|
|
1263
|
+
|
|
1264
|
+
for col in columns_list:
|
|
1265
|
+
_validate_column_in_list_of_columns(df, df_columns, col, 'columns_list')
|
|
1266
|
+
|
|
1267
|
+
return True
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
def _validate_column_in_list_of_columns(df, df_columns, col, col_arg):
|
|
1271
|
+
"""
|
|
1272
|
+
Internal function to validate the arguments used to specify
|
|
1273
|
+
a column name in DataFrame.
|
|
1274
|
+
|
|
1275
|
+
PARAMETERS:
|
|
1276
|
+
df:
|
|
1277
|
+
Name of the DataFrame to which the column being validated
|
|
1278
|
+
does or should belong.
|
|
1279
|
+
|
|
1280
|
+
df_column_list:
|
|
1281
|
+
List of columns in the DataFrame.
|
|
1282
|
+
|
|
1283
|
+
col:
|
|
1284
|
+
Column to be validated.
|
|
1285
|
+
|
|
1286
|
+
col_arg:
|
|
1287
|
+
Name of argument used to specify the column name.
|
|
1288
|
+
|
|
1289
|
+
RETURNS:
|
|
1290
|
+
True, if column name is a valid.
|
|
1291
|
+
|
|
1292
|
+
RAISES:
|
|
1293
|
+
TeradataMlException if invalid column name.
|
|
1294
|
+
"""
|
|
1295
|
+
if col not in df_columns:
|
|
1296
|
+
raise TeradataMlException(
|
|
1297
|
+
Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(col,
|
|
1298
|
+
col_arg,
|
|
1299
|
+
df,
|
|
1300
|
+
'DataFrame'),
|
|
1301
|
+
MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND)
|
|
1302
|
+
|
|
1303
|
+
return True
|
|
1304
|
+
|
|
1305
|
+
|
|
1306
|
+
def _validate_column_type(df, col, col_arg, expected_types, types = None, index = False, index_label = None):
|
|
1307
|
+
"""
|
|
1308
|
+
Internal function to validate the type of an input DataFrame column against
|
|
1309
|
+
a list of expected types.
|
|
1310
|
+
|
|
1311
|
+
PARAMETERS
|
|
1312
|
+
df:
|
|
1313
|
+
Input DataFrame (Pandas or teradataml) which has the column to be tested
|
|
1314
|
+
for type.
|
|
1315
|
+
|
|
1316
|
+
col:
|
|
1317
|
+
The column in the input DataFrame to be tested for type.
|
|
1318
|
+
|
|
1319
|
+
col_arg:
|
|
1320
|
+
The name of the argument used to pass the column name.
|
|
1321
|
+
|
|
1322
|
+
expected_types:
|
|
1323
|
+
Specifies a list of teradatasqlachemy datatypes that the column is
|
|
1324
|
+
expected to be of type.
|
|
1325
|
+
|
|
1326
|
+
types:
|
|
1327
|
+
Dictionary specifying column-name to teradatasqlalchemy type-mapping.
|
|
1328
|
+
|
|
1329
|
+
RETURNS:
|
|
1330
|
+
True, when the columns is of an expected type.
|
|
1331
|
+
|
|
1332
|
+
RAISES:
|
|
1333
|
+
TeradataMlException, when the columns is not one of the expected types.
|
|
1334
|
+
|
|
1335
|
+
EXAMPLES:
|
|
1336
|
+
_validate_column_type(df, timecode_column, 'timecode_column', PTITableConstants.VALID_TIMECODE_DATATYPES, types)
|
|
1337
|
+
"""
|
|
1338
|
+
# Check if sequence_column is being translated to a valid_type
|
|
1339
|
+
if types is not None and col in types:
|
|
1340
|
+
if not any(isinstance(types[col], expected_type) for expected_type in expected_types):
|
|
1341
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
|
|
1342
|
+
format(col_arg, types[col], ' or '.join(expected_type.__visit_name__
|
|
1343
|
+
for expected_type in expected_types)),
|
|
1344
|
+
MessageCodes.INVALID_COLUMN_TYPE)
|
|
1345
|
+
# Else we need to copy without any casting
|
|
1346
|
+
elif isinstance(df, pd.DataFrame):
|
|
1347
|
+
t = _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
|
|
1348
|
+
if t not in expected_types:
|
|
1349
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
|
|
1350
|
+
format(col_arg, t, ' or '.join(expected_type.__visit_name__
|
|
1351
|
+
for expected_type in expected_types)),
|
|
1352
|
+
MessageCodes.INVALID_COLUMN_TYPE)
|
|
1353
|
+
elif not any(isinstance(df[col].type, t) for t in expected_types):
|
|
1354
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).
|
|
1355
|
+
format(col_arg, df[col].type, ' or '.join(expected_type.__visit_name__
|
|
1356
|
+
for expected_type in expected_types)),
|
|
1357
|
+
MessageCodes.INVALID_COLUMN_TYPE)
|
|
1358
|
+
|
|
1359
|
+
return True
|
|
1360
|
+
|
|
1361
|
+
|
|
1362
|
+
def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
|
|
1363
|
+
index_label=None, partition_expression=None, partition_function=None):
|
|
1364
|
+
"""
|
|
1365
|
+
This is an internal function used to construct a SQLAlchemy Table Object.
|
|
1366
|
+
This function checks appropriate flags and supports creation of Teradata
|
|
1367
|
+
specific Table constructs such as Volatile/Primary Index tables.
|
|
1368
|
+
|
|
1369
|
+
|
|
1370
|
+
PARAMETERS:
|
|
1371
|
+
df:
|
|
1372
|
+
The teradataml or Pandas DataFrame object to be saved.
|
|
1373
|
+
|
|
1374
|
+
table_name:
|
|
1375
|
+
Name of SQL table.
|
|
1376
|
+
|
|
1377
|
+
con:
|
|
1378
|
+
A SQLAlchemy connectable (engine/connection) object
|
|
1379
|
+
|
|
1380
|
+
primary_index:
|
|
1381
|
+
Creates Teradata Table(s) with Primary index column if specified.
|
|
1382
|
+
|
|
1383
|
+
temporary:
|
|
1384
|
+
Flag specifying whether SQL table to be created is Volatile or not.
|
|
1385
|
+
|
|
1386
|
+
schema_name:
|
|
1387
|
+
Specifies the name of the SQL schema in the database to write to.
|
|
1388
|
+
|
|
1389
|
+
set_table:
|
|
1390
|
+
A flag specifying whether to create a SET table or a MULTISET table.
|
|
1391
|
+
When True, an attempt to create a SET table is made.
|
|
1392
|
+
When False, an attempt to create a MULTISET table is made.
|
|
1393
|
+
|
|
1394
|
+
partition_expression:
|
|
1395
|
+
Specifies the partitioning expression to be used for partition by clause.
|
|
1396
|
+
|
|
1397
|
+
partition_function:
|
|
1398
|
+
Specifies the partitioning function to be used with partition by clause.
|
|
1399
|
+
|
|
1400
|
+
types:
|
|
1401
|
+
Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
|
|
1402
|
+
|
|
1403
|
+
index:
|
|
1404
|
+
Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
|
|
1405
|
+
|
|
1406
|
+
index_label:
|
|
1407
|
+
Column label(s) for index column(s).
|
|
1408
|
+
|
|
1409
|
+
RETURNS:
|
|
1410
|
+
SQLAlchemy Table
|
|
1411
|
+
|
|
1412
|
+
RAISES:
|
|
1413
|
+
N/A
|
|
1414
|
+
|
|
1415
|
+
EXAMPLES:
|
|
1416
|
+
_create_table_object(df = my_df, table_name = 'test_table', con = tdconnection, primary_index = None,
|
|
1417
|
+
temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
|
|
1418
|
+
_create_table_object(df = csv_filepath, table_name = 'test_table', con = tdconnection, primary_index = None,
|
|
1419
|
+
temporary = True, schema_name = schema, set_table=False, types = types, index = True, index_label = None)
|
|
1420
|
+
"""
|
|
1421
|
+
# Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
|
|
1422
|
+
post_params = {}
|
|
1423
|
+
prefix = []
|
|
1424
|
+
pti = post(opts=post_params)
|
|
1425
|
+
|
|
1426
|
+
if temporary is True:
|
|
1427
|
+
pti = pti.on_commit(option='preserve')
|
|
1428
|
+
prefix.append('VOLATILE')
|
|
1429
|
+
|
|
1430
|
+
if not set_table:
|
|
1431
|
+
prefix.append('multiset')
|
|
1432
|
+
else:
|
|
1433
|
+
prefix.append('set')
|
|
1434
|
+
|
|
1435
|
+
meta = MetaData()
|
|
1436
|
+
meta.bind = con
|
|
1437
|
+
|
|
1438
|
+
if isinstance(df, pd.DataFrame):
|
|
1439
|
+
col_names, col_types = _extract_column_info(df, types, index, index_label)
|
|
1440
|
+
elif isinstance(df, str):
|
|
1441
|
+
col_names, col_types = _extract_column_info(df, types)
|
|
1442
|
+
else:
|
|
1443
|
+
col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
|
|
1444
|
+
if types is not None:
|
|
1445
|
+
# When user-type provided use, or default when partial types provided.
|
|
1446
|
+
col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
|
|
1447
|
+
|
|
1448
|
+
if primary_index is not None:
|
|
1449
|
+
if isinstance(primary_index, list):
|
|
1450
|
+
pti = pti.primary_index(unique=False, cols=primary_index)
|
|
1451
|
+
elif isinstance(primary_index, str):
|
|
1452
|
+
pti = pti.primary_index(unique=False, cols=[primary_index])
|
|
1453
|
+
else:
|
|
1454
|
+
pti = pti.no_primary_index()
|
|
1455
|
+
|
|
1456
|
+
# Partitioning expression and function
|
|
1457
|
+
if partition_expression:
|
|
1458
|
+
pti = pti.partition_by(partition_expression=partition_expression,
|
|
1459
|
+
partition_fn=partition_function)
|
|
1460
|
+
|
|
1461
|
+
# Create default Table construct with parameter dictionary
|
|
1462
|
+
table = Table(table_name, meta,
|
|
1463
|
+
*(Column(col_name, col_type)
|
|
1464
|
+
for col_name, col_type in
|
|
1465
|
+
zip(col_names, col_types)),
|
|
1466
|
+
teradatasql_post_create=pti,
|
|
1467
|
+
prefixes=prefix,
|
|
1468
|
+
schema=schema_name
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
return table
|
|
1472
|
+
|
|
1473
|
+
|
|
1474
|
+
def _create_pti_table_object(df, con, table_name, schema_name, temporary, primary_time_index_name,
|
|
1475
|
+
timecode_column, timezero_date, timebucket_duration,
|
|
1476
|
+
sequence_column, seq_max, columns_list, set_table, types, index=None, index_label=None):
|
|
1477
|
+
"""
|
|
1478
|
+
This is an internal function used to construct a SQLAlchemy Table Object.
|
|
1479
|
+
This function checks appropriate flags and supports creation of Teradata
|
|
1480
|
+
specific Table constructs such as Volatile and Primary Time Index tables.
|
|
1481
|
+
|
|
1482
|
+
PARAMETERS:
|
|
1483
|
+
df:
|
|
1484
|
+
The teradataml or Pandas DataFrame object to be saved.
|
|
1485
|
+
|
|
1486
|
+
con:
|
|
1487
|
+
A SQLAlchemy connectable (engine/connection) object
|
|
1488
|
+
|
|
1489
|
+
table_name:
|
|
1490
|
+
Name of SQL table.
|
|
1491
|
+
|
|
1492
|
+
schema_name:
|
|
1493
|
+
Specifies the name of the SQL schema in the database to write to.
|
|
1494
|
+
|
|
1495
|
+
temporary:
|
|
1496
|
+
Flag specifying whether SQL table to be created is Volatile or not.
|
|
1497
|
+
|
|
1498
|
+
primary_time_index_name:
|
|
1499
|
+
A name for the Primary Time Index (PTI).
|
|
1500
|
+
|
|
1501
|
+
timecode_column:
|
|
1502
|
+
The column in the DataFrame that reflects the form of the timestamp
|
|
1503
|
+
data in the time series.
|
|
1504
|
+
|
|
1505
|
+
timezero_date:
|
|
1506
|
+
Specifies the earliest time series data that the PTI table will accept.
|
|
1507
|
+
|
|
1508
|
+
timebucket_duration:
|
|
1509
|
+
A duration that serves to break up the time continuum in
|
|
1510
|
+
the time series data into discrete groups or buckets.
|
|
1511
|
+
|
|
1512
|
+
sequence_column:
|
|
1513
|
+
Specifies a column with sequences implying that time series data
|
|
1514
|
+
readings are not unique. If not specified, the time series data are
|
|
1515
|
+
assumed to be unique.
|
|
1516
|
+
|
|
1517
|
+
seq_max:
|
|
1518
|
+
Specifies the maximum number of sensor data rows that can have the
|
|
1519
|
+
same timestamp. Can be used when 'sequenced' is True.
|
|
1520
|
+
|
|
1521
|
+
columns_list:
|
|
1522
|
+
A list of one or more PTI table column names.
|
|
1523
|
+
|
|
1524
|
+
set_table:
|
|
1525
|
+
A flag specifying whether to create a SET table or a MULTISET table.
|
|
1526
|
+
When True, an attempt to create a SET table is made.
|
|
1527
|
+
When False, an attempt to create a MULTISET table is made.
|
|
1528
|
+
|
|
1529
|
+
types:
|
|
1530
|
+
Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
|
|
1531
|
+
|
|
1532
|
+
index:
|
|
1533
|
+
Flag specifying whether to write Pandas DataFrame index as a column or not.
|
|
1534
|
+
|
|
1535
|
+
index_label:
|
|
1536
|
+
Column label for index column(s).
|
|
1537
|
+
|
|
1538
|
+
RETURNS:
|
|
1539
|
+
SQLAlchemy Table
|
|
1540
|
+
|
|
1541
|
+
RAISES:
|
|
1542
|
+
N/A
|
|
1543
|
+
|
|
1544
|
+
EXAMPLES:
|
|
1545
|
+
_create_pti_table_object(df = my_df, table_name = 'test_table', con = tdconnection,
|
|
1546
|
+
timecode_column = 'ts', columns_list = ['user_id', 'location'])
|
|
1547
|
+
|
|
1548
|
+
"""
|
|
1549
|
+
meta = MetaData()
|
|
1550
|
+
|
|
1551
|
+
if isinstance(df, pd.DataFrame):
|
|
1552
|
+
col_names, col_types = _extract_column_info(df, types, index, index_label)
|
|
1553
|
+
timecode_datatype = col_types[col_names.index(timecode_column)]()
|
|
1554
|
+
else:
|
|
1555
|
+
col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
|
|
1556
|
+
if types is not None:
|
|
1557
|
+
# When user-type provided use, or default when partial types provided
|
|
1558
|
+
col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
|
|
1559
|
+
timecode_datatype = df[timecode_column].type
|
|
1560
|
+
|
|
1561
|
+
# Remove timecode and sequence column from col_name and col_types
|
|
1562
|
+
# since the required columns will be created automatically
|
|
1563
|
+
if timecode_column in col_names:
|
|
1564
|
+
ind = col_names.index(timecode_column)
|
|
1565
|
+
col_names.pop(ind)
|
|
1566
|
+
col_types.pop(ind)
|
|
1567
|
+
|
|
1568
|
+
if sequence_column is not None and sequence_column in col_names:
|
|
1569
|
+
ind = col_names.index(sequence_column)
|
|
1570
|
+
col_names.pop(ind)
|
|
1571
|
+
col_types.pop(ind)
|
|
1572
|
+
|
|
1573
|
+
# Dictionary to append special flags, can be extended to add Fallback, Journalling, Log etc.
|
|
1574
|
+
post_params = {}
|
|
1575
|
+
prefix = []
|
|
1576
|
+
pti = post(opts=post_params)
|
|
1577
|
+
|
|
1578
|
+
# Create Table object with appropriate Primary Time Index/Prefix for volatile
|
|
1579
|
+
if temporary:
|
|
1580
|
+
pti = pti.on_commit(option='preserve')
|
|
1581
|
+
prefix.append('VOLATILE')
|
|
1582
|
+
|
|
1583
|
+
if not set_table:
|
|
1584
|
+
prefix.append('multiset')
|
|
1585
|
+
else:
|
|
1586
|
+
prefix.append('set')
|
|
1587
|
+
|
|
1588
|
+
pti = pti.primary_time_index(timecode_datatype,
|
|
1589
|
+
name=primary_time_index_name,
|
|
1590
|
+
timezero_date=timezero_date,
|
|
1591
|
+
timebucket_duration=timebucket_duration,
|
|
1592
|
+
sequenced=True if sequence_column is not None else False,
|
|
1593
|
+
seq_max=seq_max,
|
|
1594
|
+
cols=columns_list)
|
|
1595
|
+
|
|
1596
|
+
table = Table(table_name, meta,
|
|
1597
|
+
*(Column(col_name, col_type)
|
|
1598
|
+
for col_name, col_type in
|
|
1599
|
+
zip(col_names, col_types)),
|
|
1600
|
+
teradatasql_post_create=pti,
|
|
1601
|
+
prefixes=prefix,
|
|
1602
|
+
schema=schema_name
|
|
1603
|
+
)
|
|
1604
|
+
|
|
1605
|
+
return table
|
|
1606
|
+
|
|
1607
|
+
def _create_temporal_table(df, table_name, con, primary_index, schema_name,
|
|
1608
|
+
valid_time_columns, derived_column, types, index=None, index_label=None):
|
|
1609
|
+
"""
|
|
1610
|
+
This is an internal function used to construct a CREATE TABLE statement for a Teradata temporal table.
|
|
1611
|
+
Supports creation of tables with a PERIOD FOR derived column using the specified valid time columns.
|
|
1612
|
+
|
|
1613
|
+
PARAMETERS:
|
|
1614
|
+
df:
|
|
1615
|
+
Required Arugment.
|
|
1616
|
+
The teradataml or Pandas DataFrame object to be saved.
|
|
1617
|
+
Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
|
|
1618
|
+
|
|
1619
|
+
table_name:
|
|
1620
|
+
Required Argument.
|
|
1621
|
+
Name of SQL table.
|
|
1622
|
+
Types: String
|
|
1623
|
+
|
|
1624
|
+
con:
|
|
1625
|
+
Optional Argument.
|
|
1626
|
+
A SQLAlchemy connectable (engine/connection) object.
|
|
1627
|
+
Types: SQLAlchemy Engine or Connection
|
|
1628
|
+
|
|
1629
|
+
primary_index:
|
|
1630
|
+
Optional Argument.
|
|
1631
|
+
Creates Teradata Table(s) with Primary index column if specified.
|
|
1632
|
+
Types: String or list of Strings
|
|
1633
|
+
|
|
1634
|
+
schema_name:
|
|
1635
|
+
Optional Argument.
|
|
1636
|
+
Specifies the name of the SQL schema in the database to write to.
|
|
1637
|
+
Types: String
|
|
1638
|
+
|
|
1639
|
+
valid_time_columns:
|
|
1640
|
+
Required Argument.
|
|
1641
|
+
Specifies a tuple of two column names representing the temporal validity period.
|
|
1642
|
+
Types: tuple of Strings or str
|
|
1643
|
+
|
|
1644
|
+
derived_column:
|
|
1645
|
+
Optional Argument.
|
|
1646
|
+
Specifies the name of the derived PERIOD FOR column to be created.
|
|
1647
|
+
Types: String
|
|
1648
|
+
|
|
1649
|
+
types:
|
|
1650
|
+
Optional Argument.
|
|
1651
|
+
Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
|
|
1652
|
+
Types: dict
|
|
1653
|
+
|
|
1654
|
+
index:
|
|
1655
|
+
Optional Argument.
|
|
1656
|
+
Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
|
|
1657
|
+
Types: Boolean
|
|
1658
|
+
|
|
1659
|
+
index_label:
|
|
1660
|
+
Optional Argument.
|
|
1661
|
+
Column label(s) for index column(s).
|
|
1662
|
+
Types: String or list of Strings
|
|
1663
|
+
|
|
1664
|
+
RETURNS:
|
|
1665
|
+
None
|
|
1666
|
+
|
|
1667
|
+
RAISES:
|
|
1668
|
+
TeradataMlException
|
|
1669
|
+
|
|
1670
|
+
EXAMPLES:
|
|
1671
|
+
_create_temporal_table(
|
|
1672
|
+
df=my_df,
|
|
1673
|
+
table_name='temporal_table',
|
|
1674
|
+
con=td_connection,
|
|
1675
|
+
primary_index=['id'],
|
|
1676
|
+
schema_name='my_schema',
|
|
1677
|
+
valid_time_columns=('start_date', 'end_date'),
|
|
1678
|
+
derived_column='validity_period',
|
|
1679
|
+
types={'id': INTEGER, 'start_date': DATE, 'end_date': DATE},
|
|
1680
|
+
index=False,
|
|
1681
|
+
index_label=None
|
|
1682
|
+
)
|
|
1683
|
+
|
|
1684
|
+
"""
|
|
1685
|
+
|
|
1686
|
+
# Extract column names and types
|
|
1687
|
+
if isinstance(df, pd.DataFrame):
|
|
1688
|
+
col_names, col_types = _extract_column_info(df, types, index, index_label)
|
|
1689
|
+
else:
|
|
1690
|
+
col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
|
|
1691
|
+
if types is not None:
|
|
1692
|
+
col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
|
|
1693
|
+
|
|
1694
|
+
columns_clause_ = []
|
|
1695
|
+
# Ensure all col_types are instances, not classes
|
|
1696
|
+
for i, col_type in enumerate(col_types):
|
|
1697
|
+
if isinstance(col_type, type):
|
|
1698
|
+
col_types[i] = col_type()
|
|
1699
|
+
# Use col_names and col_types to build the columns clause
|
|
1700
|
+
# Compile column types to string using the dialect of the current connection
|
|
1701
|
+
# Add NOT NULL to valid_time_columns
|
|
1702
|
+
for col_name, col_type in zip(col_names, col_types):
|
|
1703
|
+
col_def = '{} {}'.format(col_name, col_type.compile(dialect=td_dialect()))
|
|
1704
|
+
|
|
1705
|
+
if col_name in valid_time_columns:
|
|
1706
|
+
col_def += ' NOT NULL'
|
|
1707
|
+
if isinstance(col_type, (PERIOD_DATE, PERIOD_TIMESTAMP)):
|
|
1708
|
+
col_def += ' AS VALIDTIME'
|
|
1709
|
+
columns_clause_.append(col_def)
|
|
1710
|
+
|
|
1711
|
+
period_for_clause = []
|
|
1712
|
+
if isinstance(valid_time_columns, tuple):
|
|
1713
|
+
if derived_column is None:
|
|
1714
|
+
derived_column = "_".join(valid_time_columns)
|
|
1715
|
+
period_for_clause = ['PERIOD FOR {} ({}, {}) AS VALIDTIME'.format(
|
|
1716
|
+
derived_column, valid_time_columns[0], valid_time_columns[1])
|
|
1717
|
+
]
|
|
1718
|
+
columns_clause = ",\n ".join(columns_clause_ + period_for_clause)
|
|
1719
|
+
|
|
1720
|
+
# Prepare primary index clause.
|
|
1721
|
+
if primary_index:
|
|
1722
|
+
primary_index_clause = "PRIMARY INDEX ({})".format(
|
|
1723
|
+
", ".join(UtilFuncs._as_list(primary_index)))
|
|
1724
|
+
else:
|
|
1725
|
+
primary_index_clause = ""
|
|
1726
|
+
|
|
1727
|
+
# Prepare create table statement.
|
|
1728
|
+
table_name = UtilFuncs._get_qualified_table_name(schema_name, table_name) if\
|
|
1729
|
+
schema_name else table_name
|
|
1730
|
+
sql = """
|
|
1731
|
+
CREATE MULTISET TABLE {}
|
|
1732
|
+
(\n{}\n)\n{}
|
|
1733
|
+
""".format(table_name, columns_clause, primary_index_clause)
|
|
1734
|
+
try:
|
|
1735
|
+
execute_sql(sql)
|
|
1736
|
+
except Exception as err:
|
|
1737
|
+
raise TeradataMlException(
|
|
1738
|
+
Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
|
|
1739
|
+
'\n' + str(err),
|
|
1740
|
+
MessageCodes.TABLE_OBJECT_CREATION_FAILED
|
|
1741
|
+
)
|
|
1742
|
+
|
|
1743
|
+
|
|
1744
|
+
def _rename_column(col_names, search_for, rename_to):
|
|
1745
|
+
"""
|
|
1746
|
+
Internal function to rename a column in a list of columns of a Pandas DataFrame.
|
|
1747
|
+
|
|
1748
|
+
PARAMETERS:
|
|
1749
|
+
col_names:
|
|
1750
|
+
Required Argument.
|
|
1751
|
+
The list of column names of the Pandas DataFrame.
|
|
1752
|
+
|
|
1753
|
+
search_for:
|
|
1754
|
+
Required Argument.
|
|
1755
|
+
The column name that need to be changed/renamed.
|
|
1756
|
+
|
|
1757
|
+
rename_to:
|
|
1758
|
+
Required Argument.
|
|
1759
|
+
The column name that the 'search_for' column needs to be replaced with.
|
|
1760
|
+
|
|
1761
|
+
RETURNS:
|
|
1762
|
+
A list of renamed columns list.
|
|
1763
|
+
|
|
1764
|
+
EXAMPLES:
|
|
1765
|
+
cols = _rename_column(cols, 'col_1', 'new_col_1')
|
|
1766
|
+
"""
|
|
1767
|
+
ind = col_names.index(search_for)
|
|
1768
|
+
col_names.pop(ind)
|
|
1769
|
+
col_names.insert(ind, rename_to)
|
|
1770
|
+
|
|
1771
|
+
return col_names
|
|
1772
|
+
|
|
1773
|
+
|
|
1774
|
+
def _rename_to_pti_columns(col_names, timecode_column, sequence_column,
|
|
1775
|
+
timecode_column_index=None, sequence_column_index=None):
|
|
1776
|
+
"""
|
|
1777
|
+
Internal function to generate a list of renamed columns of a Pandas DataFrame to match that of the PTI table column names
|
|
1778
|
+
in Vantage, or revert any such changes made.
|
|
1779
|
+
|
|
1780
|
+
PARAMETERS:
|
|
1781
|
+
col_names:
|
|
1782
|
+
The list of column names of the Pandas DataFrame.
|
|
1783
|
+
|
|
1784
|
+
timecode_column:
|
|
1785
|
+
The column name that reflects the timecode column in the PTI table.
|
|
1786
|
+
|
|
1787
|
+
sequence_column:
|
|
1788
|
+
The column name that reflects the sequence column in the PTI table.
|
|
1789
|
+
|
|
1790
|
+
timecode_column_index:
|
|
1791
|
+
The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
|
|
1792
|
+
performed.
|
|
1793
|
+
|
|
1794
|
+
sequence_column_index:
|
|
1795
|
+
The index of the timecode column. When Specified, it indicates that a reverse renaming operation is to be
|
|
1796
|
+
performed.
|
|
1797
|
+
|
|
1798
|
+
RETURNS:
|
|
1799
|
+
A list of renamed PTI related columns.
|
|
1800
|
+
|
|
1801
|
+
EXAMPLES:
|
|
1802
|
+
cols = _rename_to_pti_columns(cols, timecode_column, sequence_column, t_index=None, s_index)
|
|
1803
|
+
cols = _rename_to_pti_columns(cols, timecode_column, sequence_column)
|
|
1804
|
+
"""
|
|
1805
|
+
# Rename the timecode_column to what it is in Vantage
|
|
1806
|
+
if timecode_column_index is not None:
|
|
1807
|
+
col_names = _rename_column(col_names, PTITableConstants.TD_TIMECODE.value, timecode_column)
|
|
1808
|
+
else:
|
|
1809
|
+
col_names = _rename_column(col_names, timecode_column, PTITableConstants.TD_TIMECODE.value)
|
|
1810
|
+
|
|
1811
|
+
# Rename the sequence_column to what it is in Vantage
|
|
1812
|
+
if sequence_column is not None:
|
|
1813
|
+
if sequence_column_index is not None:
|
|
1814
|
+
col_names = _rename_column(col_names, PTITableConstants.TD_SEQNO.value, sequence_column)
|
|
1815
|
+
else:
|
|
1816
|
+
col_names = _rename_column(col_names, sequence_column, PTITableConstants.TD_SEQNO.value)
|
|
1817
|
+
|
|
1818
|
+
return col_names
|
|
1819
|
+
|
|
1820
|
+
|
|
1821
|
+
def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column, df_col_type_list = None):
|
|
1822
|
+
"""
|
|
1823
|
+
Internal function to reorder the list of columns used to construct the 'INSERT INTO'
|
|
1824
|
+
statement as required when the target table is a PTI table.
|
|
1825
|
+
|
|
1826
|
+
PARAMETERS:
|
|
1827
|
+
df_column_list:
|
|
1828
|
+
A list of column names for the columns in the DataFrame.
|
|
1829
|
+
|
|
1830
|
+
timecode_column:
|
|
1831
|
+
The timecode_columns which should be moved to the first position.
|
|
1832
|
+
|
|
1833
|
+
sequence_column:
|
|
1834
|
+
The timecode_columns which should be moved to the first position.
|
|
1835
|
+
|
|
1836
|
+
df_col_type_list:
|
|
1837
|
+
Optionally reorder the list containing the types of the columns to match the
|
|
1838
|
+
reordering the of df_column_list.
|
|
1839
|
+
|
|
1840
|
+
RETURNS:
|
|
1841
|
+
A reordered list of columns names for the columns in the DataFrame.
|
|
1842
|
+
If the optional types list is also specified, then a tuple of the list reordered columns names
|
|
1843
|
+
and the list of the column types.
|
|
1844
|
+
|
|
1845
|
+
EXAMPLE:
|
|
1846
|
+
new_colname_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
|
|
1847
|
+
new_colname_list, new_type_list = _reorder_insert_list_for_pti(df_column_list, timecode_column,
|
|
1848
|
+
sequence_column, df_col_type_list)
|
|
1849
|
+
"""
|
|
1850
|
+
# Reposition timecode (to the first) and sequence column (to the second)
|
|
1851
|
+
# in df_column_list
|
|
1852
|
+
timecode_column_index = df_column_list.index(timecode_column)
|
|
1853
|
+
df_column_list.insert(0, df_column_list.pop(timecode_column_index))
|
|
1854
|
+
if df_col_type_list is not None:
|
|
1855
|
+
df_col_type_list.insert(0, df_col_type_list.pop(timecode_column_index))
|
|
1856
|
+
|
|
1857
|
+
if sequence_column is not None:
|
|
1858
|
+
sequence_column_index = df_column_list.index(sequence_column)
|
|
1859
|
+
df_column_list.insert(1, df_column_list.pop(sequence_column_index))
|
|
1860
|
+
if df_col_type_list is not None:
|
|
1861
|
+
df_col_type_list.insert(0, df_col_type_list.pop(sequence_column_index))
|
|
1862
|
+
|
|
1863
|
+
if df_col_type_list is not None:
|
|
1864
|
+
return df_column_list, df_col_type_list
|
|
1865
|
+
else:
|
|
1866
|
+
return df_column_list
|
|
1867
|
+
|
|
1868
|
+
|
|
1869
|
+
def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
|
|
1870
|
+
is_pti=False, timecode_column=None, sequence_column=None, derived_column=None):
|
|
1871
|
+
"""
|
|
1872
|
+
Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
|
|
1873
|
+
and check if the number of columns and their names are matching to determine table insertion compatibility.
|
|
1874
|
+
|
|
1875
|
+
PARAMETERS:
|
|
1876
|
+
table1_col_object:
|
|
1877
|
+
Specifies a list/collection of SQLAlchemy ColumnExpression Objects for first table.
|
|
1878
|
+
|
|
1879
|
+
table2_cols:
|
|
1880
|
+
Specifies a list of column names for second table (teradataml DataFrame).
|
|
1881
|
+
|
|
1882
|
+
is_pandas_df:
|
|
1883
|
+
Flag specifying whether the table objects to check are pandas DataFrames or not
|
|
1884
|
+
Default: False
|
|
1885
|
+
Note: When this flag is True, table2_cols is passed as a tuple object of
|
|
1886
|
+
([column_names], [column_types])
|
|
1887
|
+
|
|
1888
|
+
is_pti:
|
|
1889
|
+
Boolean flag indicating if the target table is a PTI table.
|
|
1890
|
+
|
|
1891
|
+
timecode_column:
|
|
1892
|
+
timecode_column required to order the select expression for the insert.
|
|
1893
|
+
It should be the first column in the select expression.
|
|
1894
|
+
|
|
1895
|
+
sequence_column:
|
|
1896
|
+
sequence_column required to order the select expression for the insert.
|
|
1897
|
+
It should be the second column in the select expression.
|
|
1898
|
+
|
|
1899
|
+
derived_column:
|
|
1900
|
+
Specifies a derived column that is part of the table schema but not
|
|
1901
|
+
part of insert.
|
|
1902
|
+
Types: String
|
|
1903
|
+
|
|
1904
|
+
RETURNS:
|
|
1905
|
+
a) True, when insertion compatible (number of columns and their names match)
|
|
1906
|
+
b) False, otherwise
|
|
1907
|
+
|
|
1908
|
+
RAISES:
|
|
1909
|
+
N/A
|
|
1910
|
+
|
|
1911
|
+
EXAMPLES:
|
|
1912
|
+
_check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
|
|
1913
|
+
_check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
|
|
1914
|
+
_check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq', 'derived_col')
|
|
1915
|
+
|
|
1916
|
+
"""
|
|
1917
|
+
table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
|
|
1918
|
+
table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
|
|
1919
|
+
|
|
1920
|
+
# Remove derived_column from table1_col_names if specified
|
|
1921
|
+
if derived_column is not None and derived_column in table1_col_names:
|
|
1922
|
+
table1_col_names.remove(derived_column)
|
|
1923
|
+
|
|
1924
|
+
# Check for number of columns
|
|
1925
|
+
if len(table1_col_names) != len(table2_col_names):
|
|
1926
|
+
return False
|
|
1927
|
+
|
|
1928
|
+
if is_pti is True:
|
|
1929
|
+
# Reposition timecode (to the first) and sequence column (to the second)
|
|
1930
|
+
# with their names as generated by the database, in col_name since that
|
|
1931
|
+
# is the default position of the columns.
|
|
1932
|
+
table2_col_names = _reorder_insert_list_for_pti(table2_col_names, timecode_column, sequence_column)
|
|
1933
|
+
table2_col_names = _rename_to_pti_columns(table2_col_names, timecode_column, sequence_column)
|
|
1934
|
+
|
|
1935
|
+
# Check for the column names
|
|
1936
|
+
for i in range(len(table1_col_names)):
|
|
1937
|
+
if table1_col_names[i] != table2_col_names[i]:
|
|
1938
|
+
return False
|
|
1939
|
+
|
|
1940
|
+
# Number of columns and their names in both List of ColumnExpressions match
|
|
1941
|
+
return True
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
def _extract_column_info(df, types = None, index = False, index_label = None):
|
|
1945
|
+
"""
|
|
1946
|
+
This is an internal function used to extract column information for a DF,
|
|
1947
|
+
and map to user-specified teradatasqlalchemy types, if specified,
|
|
1948
|
+
for Table creation.
|
|
1949
|
+
|
|
1950
|
+
PARAMETERS:
|
|
1951
|
+
df:
|
|
1952
|
+
The Pandas DataFrame object to be saved.
|
|
1953
|
+
|
|
1954
|
+
types:
|
|
1955
|
+
A python dictionary with column names and required types as key-value pairs.
|
|
1956
|
+
|
|
1957
|
+
index:
|
|
1958
|
+
Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
|
|
1959
|
+
|
|
1960
|
+
index_label:
|
|
1961
|
+
Column label(s) for index column(s).
|
|
1962
|
+
|
|
1963
|
+
RETURNS:
|
|
1964
|
+
A tuple with the following elements:
|
|
1965
|
+
a) List of DataFrame Column names
|
|
1966
|
+
b) List of equivalent teradatasqlalchemy column types
|
|
1967
|
+
|
|
1968
|
+
RAISES:
|
|
1969
|
+
None
|
|
1970
|
+
|
|
1971
|
+
EXAMPLES:
|
|
1972
|
+
_extract_column_info(df = my_df)
|
|
1973
|
+
_extract_column_info(df = my_df, types = {'id_col': INTEGER})
|
|
1974
|
+
|
|
1975
|
+
"""
|
|
1976
|
+
if isinstance(df, str):
|
|
1977
|
+
return list(types.keys()), list(types.values())
|
|
1978
|
+
|
|
1979
|
+
col_names = _get_pd_df_column_names(df)
|
|
1980
|
+
|
|
1981
|
+
# If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
|
|
1982
|
+
# TIMESTAMP(timezone=True) else map it according to default value.
|
|
1983
|
+
col_types = [types.get(col_name) if types and col_name in types else
|
|
1984
|
+
TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes.iloc[key])
|
|
1985
|
+
and (df[col_name].dt.tz is not None)
|
|
1986
|
+
else _get_sqlalchemy_mapping_types(str(df.dtypes.iloc[key]))
|
|
1987
|
+
for key, col_name in enumerate(list(df.columns))]
|
|
1988
|
+
|
|
1989
|
+
ind_names = []
|
|
1990
|
+
ind_types = []
|
|
1991
|
+
if index:
|
|
1992
|
+
ind_names, ind_types = _get_index_labels(df, index_label)
|
|
1993
|
+
ind_types = [types.get(ind_name) if types and ind_name in types
|
|
1994
|
+
else TIMESTAMP(timezone=True) if pt.is_datetime64_ns_dtype(df.dtypes.iloc[key])
|
|
1995
|
+
and (df[ind_name].dt.tz is not None)
|
|
1996
|
+
else _get_sqlalchemy_mapping_types(str(ind_types[key]))
|
|
1997
|
+
for key, ind_name in enumerate(ind_names)]
|
|
1998
|
+
|
|
1999
|
+
return col_names + ind_names, col_types + ind_types
|
|
2000
|
+
|
|
2001
|
+
|
|
2002
|
+
def _insert_from_dataframe(df, con, schema_name, table_name, index, chunksize,
|
|
2003
|
+
is_pti=False, timecode_column=None, sequence_column=None,
|
|
2004
|
+
match_column_order=True):
|
|
2005
|
+
"""
|
|
2006
|
+
This is an internal function used to sequentially extract column info from DF,
|
|
2007
|
+
iterate rows, and insert rows manually.
|
|
2008
|
+
Used for Insertions to Temporary Tables & Tables with Pandas index.
|
|
2009
|
+
|
|
2010
|
+
This uses DBAPI's executeMany() which is a batch insertion method.
|
|
2011
|
+
|
|
2012
|
+
PARAMETERS:
|
|
2013
|
+
df:
|
|
2014
|
+
The Pandas DataFrame object to be saved.
|
|
2015
|
+
|
|
2016
|
+
con:
|
|
2017
|
+
A SQLAlchemy connectable (engine/connection) object
|
|
2018
|
+
|
|
2019
|
+
schema_name:
|
|
2020
|
+
Name of the schema.
|
|
2021
|
+
|
|
2022
|
+
table_name:
|
|
2023
|
+
Name of the table.
|
|
2024
|
+
|
|
2025
|
+
index:
|
|
2026
|
+
Flag specifying whether to write Pandas DataFrame index as a column or not.
|
|
2027
|
+
|
|
2028
|
+
chunksize:
|
|
2029
|
+
Specifies the number of rows to be loaded in a batch.
|
|
2030
|
+
Note:
|
|
2031
|
+
This is argument is used only when argument "df" is pandas DataFrame.
|
|
2032
|
+
|
|
2033
|
+
is_pti:
|
|
2034
|
+
Boolean flag indicating if the table should be a PTI table.
|
|
2035
|
+
|
|
2036
|
+
timecode_column:
|
|
2037
|
+
timecode_column required to order the select expression for the insert.
|
|
2038
|
+
It should be the first column in the select expression.
|
|
2039
|
+
|
|
2040
|
+
sequence_column:
|
|
2041
|
+
sequence_column required to order the select expression for the insert.
|
|
2042
|
+
It should be the second column in the select expression.
|
|
2043
|
+
|
|
2044
|
+
match_column_order:
|
|
2045
|
+
Specifies the order of the df to be loaded matches the order of the
|
|
2046
|
+
existing df or not.
|
|
2047
|
+
|
|
2048
|
+
RETURNS:
|
|
2049
|
+
N/A
|
|
2050
|
+
|
|
2051
|
+
RAISES:
|
|
2052
|
+
N/A
|
|
2053
|
+
|
|
2054
|
+
EXAMPLES:
|
|
2055
|
+
_insert_from_dataframe(df = my_df, con = tdconnection, schema = None, table_name = 'test_table',
|
|
2056
|
+
index = True, index_label = None)
|
|
2057
|
+
"""
|
|
2058
|
+
col_names = _get_pd_df_column_names(df)
|
|
2059
|
+
|
|
2060
|
+
# Quoted, schema-qualified table name
|
|
2061
|
+
table = '"{}"'.format(table_name)
|
|
2062
|
+
if schema_name is not None:
|
|
2063
|
+
table = '"{}".{}'.format(schema_name, table_name)
|
|
2064
|
+
|
|
2065
|
+
try:
|
|
2066
|
+
|
|
2067
|
+
if is_pti:
|
|
2068
|
+
# This if for non-index columns.
|
|
2069
|
+
col_names = _reorder_insert_list_for_pti(col_names, timecode_column, sequence_column)
|
|
2070
|
+
|
|
2071
|
+
is_multi_index = isinstance(df.index, pd.MultiIndex)
|
|
2072
|
+
|
|
2073
|
+
insert_list = []
|
|
2074
|
+
|
|
2075
|
+
if not match_column_order:
|
|
2076
|
+
ins = "INSERT INTO {} {} VALUES {};".format(
|
|
2077
|
+
table,
|
|
2078
|
+
'(' + ', '.join(col_names) + ')',
|
|
2079
|
+
'(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
|
|
2080
|
+
if index is True else len(col_names))]) + ')')
|
|
2081
|
+
else:
|
|
2082
|
+
ins = "INSERT INTO {} VALUES {};".format(
|
|
2083
|
+
table,
|
|
2084
|
+
'(' + ', '.join(['?' for i in range(len(col_names) + len(df.index.names)
|
|
2085
|
+
if index is True else len(col_names))]) + ')')
|
|
2086
|
+
|
|
2087
|
+
# Empty queryband buffer before SQL call.
|
|
2088
|
+
UtilFuncs._set_queryband()
|
|
2089
|
+
rowcount = 0
|
|
2090
|
+
# Iterate rows of DataFrame over new re-ordered columns
|
|
2091
|
+
for row_index, row in enumerate(df[col_names].itertuples(index=True)):
|
|
2092
|
+
ins_dict = ()
|
|
2093
|
+
for col_index, x in enumerate(col_names):
|
|
2094
|
+
ins_dict = ins_dict + (row[col_index+1],)
|
|
2095
|
+
|
|
2096
|
+
if index is True:
|
|
2097
|
+
ins_dict = ins_dict + row[0] if is_multi_index else ins_dict + (row[0],)
|
|
2098
|
+
|
|
2099
|
+
insert_list.append(ins_dict)
|
|
2100
|
+
rowcount = rowcount + 1
|
|
2101
|
+
|
|
2102
|
+
# dbapi_batchsize corresponds to the max batch size for the DBAPI driver.
|
|
2103
|
+
# Insert the rows once the batch-size reaches the max allowed.
|
|
2104
|
+
if rowcount == chunksize:
|
|
2105
|
+
# Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
|
|
2106
|
+
cur = execute_sql(ins, insert_list)
|
|
2107
|
+
if cur is not None:
|
|
2108
|
+
cur.close()
|
|
2109
|
+
rowcount = 0
|
|
2110
|
+
insert_list.clear()
|
|
2111
|
+
|
|
2112
|
+
# Insert any remaining rows.
|
|
2113
|
+
if rowcount > 0:
|
|
2114
|
+
cur = execute_sql(ins, insert_list)
|
|
2115
|
+
if cur is not None:
|
|
2116
|
+
cur.close()
|
|
2117
|
+
|
|
2118
|
+
except Exception:
|
|
2119
|
+
raise
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
def _get_pd_df_column_names(df):
|
|
2123
|
+
"""
|
|
2124
|
+
Internal function to return the names of columns in a Pandas DataFrame.
|
|
2125
|
+
|
|
2126
|
+
PARAMETERS
|
|
2127
|
+
df:
|
|
2128
|
+
The Pandas DataFrame to fetch the column names for.
|
|
2129
|
+
|
|
2130
|
+
RETURNS:
|
|
2131
|
+
A list of Strings
|
|
2132
|
+
|
|
2133
|
+
RAISES:
|
|
2134
|
+
None
|
|
2135
|
+
|
|
2136
|
+
EXAMPLES:
|
|
2137
|
+
_get_pd_df_column_names(df = my_df)
|
|
2138
|
+
"""
|
|
2139
|
+
return df.columns.tolist()
|
|
2140
|
+
|
|
2141
|
+
|
|
2142
|
+
def _get_sqlalchemy_mapping(key):
|
|
2143
|
+
"""
|
|
2144
|
+
This is an internal function used to returns a SQLAlchemy Type Mapping
|
|
2145
|
+
for a given Pandas DataFrame column Type.
|
|
2146
|
+
Used for Table Object creation internally based on DF column info.
|
|
2147
|
+
|
|
2148
|
+
For an unknown key, String (Mapping to VARCHAR) is returned
|
|
2149
|
+
|
|
2150
|
+
PARAMETERS:
|
|
2151
|
+
key : String representing Pandas type ('int64', 'object' etc.)
|
|
2152
|
+
|
|
2153
|
+
RETURNS:
|
|
2154
|
+
SQLAlchemy Type Object(Integer, String, Float, DateTime etc.)
|
|
2155
|
+
|
|
2156
|
+
RAISES:
|
|
2157
|
+
N/A
|
|
2158
|
+
|
|
2159
|
+
EXAMPLES:
|
|
2160
|
+
_get_sqlalchemy_mapping(key = 'int64')
|
|
2161
|
+
"""
|
|
2162
|
+
teradata_types_map = _get_all_sqlalchemy_mappings()
|
|
2163
|
+
|
|
2164
|
+
if key in teradata_types_map.keys():
|
|
2165
|
+
return teradata_types_map.get(key)
|
|
2166
|
+
else:
|
|
2167
|
+
return VARCHAR(configure.default_varchar_size,charset='UNICODE')
|
|
2168
|
+
|
|
2169
|
+
|
|
2170
|
+
def _get_all_sqlalchemy_mappings():
|
|
2171
|
+
"""
|
|
2172
|
+
This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
|
|
2173
|
+
It contains mappings from pandas data type to objects of SQLAlchemy Types
|
|
2174
|
+
|
|
2175
|
+
PARAMETERS:
|
|
2176
|
+
|
|
2177
|
+
RETURNS:
|
|
2178
|
+
dictionary { pandas_type : SQLAlchemy Type Object}
|
|
2179
|
+
|
|
2180
|
+
RAISES:
|
|
2181
|
+
N/A
|
|
2182
|
+
|
|
2183
|
+
EXAMPLES:
|
|
2184
|
+
_get_all_sqlalchemy_mappings()
|
|
2185
|
+
"""
|
|
2186
|
+
teradata_types_map = {'int32':INTEGER(), 'int64':BIGINT(), "Int64": INTEGER(),
|
|
2187
|
+
'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
|
|
2188
|
+
'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
|
|
2189
|
+
'float64':FLOAT(), 'float32':FLOAT(), 'bool':BYTEINT(),
|
|
2190
|
+
'datetime64':TIMESTAMP(), 'datetime64[ns]':TIMESTAMP(),
|
|
2191
|
+
'datetime64[ns, UTC]':TIMESTAMP(timezone=True),
|
|
2192
|
+
'timedelta64[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
|
|
2193
|
+
'timedelta[ns]':VARCHAR(configure.default_varchar_size,charset='UNICODE')}
|
|
2194
|
+
|
|
2195
|
+
return teradata_types_map
|
|
2196
|
+
|
|
2197
|
+
|
|
2198
|
+
def _get_sqlalchemy_mapping_types(key):
|
|
2199
|
+
"""
|
|
2200
|
+
This is an internal function used to return a SQLAlchemy Type Mapping
|
|
2201
|
+
for a given Pandas DataFrame column Type.
|
|
2202
|
+
Used for Table Object creation internally based on DF column info.
|
|
2203
|
+
|
|
2204
|
+
For an unknown key, String (Mapping to VARCHAR) is returned
|
|
2205
|
+
|
|
2206
|
+
PARAMETERS:
|
|
2207
|
+
key : String representing Pandas type ('int64', 'object' etc.)
|
|
2208
|
+
|
|
2209
|
+
RETURNS:
|
|
2210
|
+
SQLAlchemy Type (Integer, String, Float, DateTime etc.)
|
|
2211
|
+
|
|
2212
|
+
RAISES:
|
|
2213
|
+
N/A
|
|
2214
|
+
|
|
2215
|
+
EXAMPLES:
|
|
2216
|
+
_get_sqlalchemy_mapping_types(key = 'int64')
|
|
2217
|
+
"""
|
|
2218
|
+
teradata_types_map = _get_all_sqlalchemy_types_mapping()
|
|
2219
|
+
|
|
2220
|
+
if key in teradata_types_map.keys():
|
|
2221
|
+
return teradata_types_map.get(key)
|
|
2222
|
+
else:
|
|
2223
|
+
return VARCHAR(configure.default_varchar_size,charset='UNICODE')
|
|
2224
|
+
|
|
2225
|
+
|
|
2226
|
+
def _get_all_sqlalchemy_types_mapping():
|
|
2227
|
+
"""
|
|
2228
|
+
This is an internal function used to return a dictionary of all SQLAlchemy Type Mappings.
|
|
2229
|
+
It contains mappings from pandas data type to SQLAlchemyTypes
|
|
2230
|
+
|
|
2231
|
+
PARAMETERS:
|
|
2232
|
+
|
|
2233
|
+
RETURNS:
|
|
2234
|
+
dictionary { pandas_type : SQLAlchemy Type}
|
|
2235
|
+
|
|
2236
|
+
RAISES:
|
|
2237
|
+
N/A
|
|
2238
|
+
|
|
2239
|
+
EXAMPLES:
|
|
2240
|
+
_get_all_sqlalchemy_types_mapping()
|
|
2241
|
+
"""
|
|
2242
|
+
teradata_types_map = {'int32': INTEGER, 'int64': BIGINT,
|
|
2243
|
+
'object': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
|
|
2244
|
+
'O': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
|
|
2245
|
+
'float64': FLOAT, 'float32': FLOAT, 'bool': BYTEINT,
|
|
2246
|
+
'datetime64': TIMESTAMP, 'datetime64[ns]': TIMESTAMP,
|
|
2247
|
+
'datetime64[ns, UTC]': TIMESTAMP(timezone=True),
|
|
2248
|
+
'timedelta64[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE'),
|
|
2249
|
+
'timedelta[ns]': VARCHAR(configure.default_varchar_size, charset='UNICODE')}
|
|
2250
|
+
|
|
2251
|
+
return teradata_types_map
|
|
2252
|
+
|
|
2253
|
+
|
|
2254
|
+
def _validate_timezero_date(timezero_date):
|
|
2255
|
+
"""
|
|
2256
|
+
Internal function to validate timezero_date specified when creating a
|
|
2257
|
+
Primary Time Index (PTI) table.
|
|
2258
|
+
|
|
2259
|
+
PARAMETERS:
|
|
2260
|
+
timezero_date:
|
|
2261
|
+
The timezero_date passed to primary_time_index().
|
|
2262
|
+
|
|
2263
|
+
RETURNS:
|
|
2264
|
+
True if the value is valid.
|
|
2265
|
+
|
|
2266
|
+
RAISES:
|
|
2267
|
+
ValueError when the value is invalid.
|
|
2268
|
+
|
|
2269
|
+
EXAMPLE:
|
|
2270
|
+
_validate_timezero_date("DATE '2011-01-01'")
|
|
2271
|
+
_validate_timezero_date('2011-01-01') # Invalid
|
|
2272
|
+
"""
|
|
2273
|
+
# Return True is it is not specified or is None since it is optional
|
|
2274
|
+
if timezero_date is None:
|
|
2275
|
+
return True
|
|
2276
|
+
|
|
2277
|
+
pattern = re.compile(PTITableConstants.PATTERN_TIMEZERO_DATE.value)
|
|
2278
|
+
match = pattern.match(timezero_date)
|
|
2279
|
+
|
|
2280
|
+
err_msg = Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(timezero_date,
|
|
2281
|
+
'timezero_date',
|
|
2282
|
+
"str of format DATE 'YYYY-MM-DD'")
|
|
2283
|
+
|
|
2284
|
+
try:
|
|
2285
|
+
datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
|
|
2286
|
+
except (ValueError, AttributeError):
|
|
2287
|
+
raise TeradataMlException(err_msg,
|
|
2288
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
2289
|
+
|
|
2290
|
+
# Looks like the value is valid
|
|
2291
|
+
return True
|
|
2292
|
+
|
|
2293
|
+
def _validate_valid_time_columns(df, valid_time_columns, derived_column=None, types=None):
|
|
2294
|
+
"""
|
|
2295
|
+
Internal function to validate that the columns specified in valid_time_columns
|
|
2296
|
+
exist in the DataFrame, are of type DATE or TIMESTAMP, and are of the same type.
|
|
2297
|
+
Also checks that the derived_column, if specified, is not present in the DataFrame.
|
|
2298
|
+
|
|
2299
|
+
PARAMETERS:
|
|
2300
|
+
df:
|
|
2301
|
+
Required Argument.
|
|
2302
|
+
Specifies the Pandas or teradataml DataFrame object to be validated.
|
|
2303
|
+
Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
|
|
2304
|
+
|
|
2305
|
+
valid_time_columns:
|
|
2306
|
+
Required Argument.
|
|
2307
|
+
Specifies a tuple of two column names representing the temporal validity period.
|
|
2308
|
+
Types: tuple of Strings
|
|
2309
|
+
|
|
2310
|
+
derived_column:
|
|
2311
|
+
Optional Argument.
|
|
2312
|
+
Specifies the name of the derived column that should not be
|
|
2313
|
+
present in the DataFrame.
|
|
2314
|
+
Types: String
|
|
2315
|
+
|
|
2316
|
+
types:
|
|
2317
|
+
Optional Argument.
|
|
2318
|
+
Specifies a python dictionary with column-name(key) to column-type(value)
|
|
2319
|
+
mapping to create DataFrames.
|
|
2320
|
+
Types: dict
|
|
2321
|
+
|
|
2322
|
+
RETURNS:
|
|
2323
|
+
None
|
|
2324
|
+
|
|
2325
|
+
RAISES:
|
|
2326
|
+
TeradataMlException
|
|
2327
|
+
|
|
2328
|
+
EXAMPLES:
|
|
2329
|
+
_validate_valid_time_columns(
|
|
2330
|
+
df=my_df,
|
|
2331
|
+
valid_time_columns=('start_date', 'end_date'),
|
|
2332
|
+
derived_column='validity_period',
|
|
2333
|
+
types={'start_date': DATE, 'end_date': DATE}
|
|
2334
|
+
)
|
|
2335
|
+
"""
|
|
2336
|
+
df_columns = _get_pd_df_column_names(df) if isinstance(df, pd.DataFrame) else df.columns
|
|
2337
|
+
df_dtypes = (
|
|
2338
|
+
{
|
|
2339
|
+
col: _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
|
|
2340
|
+
for col in df.dtypes.keys()
|
|
2341
|
+
}
|
|
2342
|
+
if isinstance(df, pd.DataFrame)
|
|
2343
|
+
else df._td_column_names_and_sqlalchemy_types
|
|
2344
|
+
)
|
|
2345
|
+
# If types argument is provided, override the dtypes for those columns
|
|
2346
|
+
if types is not None:
|
|
2347
|
+
for col, typ in types.items():
|
|
2348
|
+
if col in df_columns:
|
|
2349
|
+
df_dtypes[col] = typ
|
|
2350
|
+
|
|
2351
|
+
|
|
2352
|
+
if derived_column is not None and derived_column in df_columns:
|
|
2353
|
+
raise TeradataMlException(
|
|
2354
|
+
Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND).format(
|
|
2355
|
+
derived_column, 'derived_column', 'dataframe.', 'Provide value which is not part of DataFrame columns'
|
|
2356
|
+
),
|
|
2357
|
+
MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND
|
|
2358
|
+
)
|
|
2359
|
+
# valid_time_columns can be a tuple of two column names or a single column name
|
|
2360
|
+
if isinstance(valid_time_columns, tuple):
|
|
2361
|
+
if len(valid_time_columns) != 2:
|
|
2362
|
+
raise TeradataMlException(
|
|
2363
|
+
Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
|
|
2364
|
+
valid_time_columns, 'valid_time_columns', 'tuple of two column names'
|
|
2365
|
+
),
|
|
2366
|
+
MessageCodes.INVALID_ARG_VALUE
|
|
2367
|
+
)
|
|
2368
|
+
# Check if both columns are present in the DataFrame
|
|
2369
|
+
for col in valid_time_columns:
|
|
2370
|
+
if col not in df_columns:
|
|
2371
|
+
raise TeradataMlException(
|
|
2372
|
+
Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(
|
|
2373
|
+
col, 'valid_time_columns', 'df', 'DataFrame'
|
|
2374
|
+
),
|
|
2375
|
+
MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND
|
|
2376
|
+
)
|
|
2377
|
+
|
|
2378
|
+
col1_type = df_dtypes[valid_time_columns[0]]
|
|
2379
|
+
col2_type = df_dtypes[valid_time_columns[1]]
|
|
2380
|
+
|
|
2381
|
+
# When types are specified, ensure they are DATE or TIMESTAMP objects or classes.
|
|
2382
|
+
if not (
|
|
2383
|
+
isinstance(col1_type, TIMESTAMP) or isinstance(col1_type, DATE) or
|
|
2384
|
+
col1_type is TIMESTAMP or col1_type is DATE
|
|
2385
|
+
):
|
|
2386
|
+
raise TeradataMlException(
|
|
2387
|
+
Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
|
|
2388
|
+
'valid_time_columns',
|
|
2389
|
+
col1_type.__name__ if isinstance(col1_type, type)
|
|
2390
|
+
else col1_type.__class__.__name__, 'DATE or TIMESTAMP'
|
|
2391
|
+
),
|
|
2392
|
+
MessageCodes.INVALID_COLUMN_TYPE
|
|
2393
|
+
)
|
|
2394
|
+
# When types are specified, ensure they are DATE or TIMESTAMP objects or classes.
|
|
2395
|
+
if not (
|
|
2396
|
+
isinstance(col2_type, TIMESTAMP) or isinstance(col2_type, DATE) or
|
|
2397
|
+
col2_type is TIMESTAMP or col2_type is DATE
|
|
2398
|
+
):
|
|
2399
|
+
raise TeradataMlException(
|
|
2400
|
+
Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
|
|
2401
|
+
'valid_time_columns',
|
|
2402
|
+
col2_type.__name__ if isinstance(col2_type, type)
|
|
2403
|
+
else col2_type.__class__.__name__, 'DATE or TIMESTAMP'
|
|
2404
|
+
),
|
|
2405
|
+
MessageCodes.INVALID_COLUMN_TYPE
|
|
2406
|
+
)
|
|
2407
|
+
|
|
2408
|
+
if type(col1_type) != type(col2_type):
|
|
2409
|
+
raise ValueError(
|
|
2410
|
+
Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
|
|
2411
|
+
valid_time_columns, 'valid_time_columns', 'both columns of same type (DATE or TIMESTAMP)'
|
|
2412
|
+
),
|
|
2413
|
+
MessageCodes.INVALID_ARG_VALUE
|
|
2414
|
+
)
|
|
2415
|
+
elif isinstance(valid_time_columns, str):
|
|
2416
|
+
col = valid_time_columns
|
|
2417
|
+
col_type = df_dtypes[col]
|
|
2418
|
+
|
|
2419
|
+
if col not in df_columns:
|
|
2420
|
+
raise TeradataMlException(
|
|
2421
|
+
Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(
|
|
2422
|
+
col, 'valid_time_columns', 'df', 'DataFrame'
|
|
2423
|
+
),
|
|
2424
|
+
MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND
|
|
2425
|
+
)
|
|
2426
|
+
# When types are specified, ensure they are PERIOD_DATE or PERIOD_TIMESTAMP objects or classes.
|
|
2427
|
+
if not (
|
|
2428
|
+
isinstance(col_type, PERIOD_TIMESTAMP) or isinstance(col_type, PERIOD_DATE) or
|
|
2429
|
+
col_type is PERIOD_TIMESTAMP or col_type is PERIOD_DATE
|
|
2430
|
+
):
|
|
2431
|
+
raise TeradataMlException(
|
|
2432
|
+
Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
|
|
2433
|
+
'valid_time_columns',
|
|
2434
|
+
col_type.__name__ if isinstance(col_type, type)
|
|
2435
|
+
else col_type.__class__.__name__, 'PERIOD_DATE or PERIOD_TIMESTAMP'
|
|
2436
|
+
),
|
|
2437
|
+
MessageCodes.INVALID_COLUMN_TYPE
|
|
2438
|
+
)
|
|
2439
|
+
else:
|
|
2440
|
+
raise TeradataMlException(
|
|
2441
|
+
Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
|
|
2442
|
+
valid_time_columns, 'valid_time_columns', 'tuple of two column names or a single column name'
|
|
2443
|
+
),
|
|
2444
|
+
MessageCodes.INVALID_ARG_VALUE
|
|
2445
|
+
)
|
|
2446
|
+
|