teradataml 20.0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +2762 -0
- teradataml/__init__.py +78 -0
- teradataml/_version.py +11 -0
- teradataml/analytics/Transformations.py +2996 -0
- teradataml/analytics/__init__.py +82 -0
- teradataml/analytics/analytic_function_executor.py +2416 -0
- teradataml/analytics/analytic_query_generator.py +1050 -0
- teradataml/analytics/byom/H2OPredict.py +514 -0
- teradataml/analytics/byom/PMMLPredict.py +437 -0
- teradataml/analytics/byom/__init__.py +16 -0
- teradataml/analytics/json_parser/__init__.py +133 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
- teradataml/analytics/json_parser/json_store.py +191 -0
- teradataml/analytics/json_parser/metadata.py +1666 -0
- teradataml/analytics/json_parser/utils.py +805 -0
- teradataml/analytics/meta_class.py +236 -0
- teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
- teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
- teradataml/analytics/sqle/__init__.py +128 -0
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
- teradataml/analytics/table_operator/__init__.py +11 -0
- teradataml/analytics/uaf/__init__.py +82 -0
- teradataml/analytics/utils.py +828 -0
- teradataml/analytics/valib.py +1617 -0
- teradataml/automl/__init__.py +5835 -0
- teradataml/automl/autodataprep/__init__.py +493 -0
- teradataml/automl/custom_json_utils.py +1625 -0
- teradataml/automl/data_preparation.py +1384 -0
- teradataml/automl/data_transformation.py +1254 -0
- teradataml/automl/feature_engineering.py +2273 -0
- teradataml/automl/feature_exploration.py +1873 -0
- teradataml/automl/model_evaluation.py +488 -0
- teradataml/automl/model_training.py +1407 -0
- teradataml/catalog/__init__.py +2 -0
- teradataml/catalog/byom.py +1759 -0
- teradataml/catalog/function_argument_mapper.py +859 -0
- teradataml/catalog/model_cataloging_utils.py +491 -0
- teradataml/clients/__init__.py +0 -0
- teradataml/clients/auth_client.py +137 -0
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/clients/pkce_client.py +481 -0
- teradataml/common/__init__.py +1 -0
- teradataml/common/aed_utils.py +2078 -0
- teradataml/common/bulk_exposed_utils.py +113 -0
- teradataml/common/constants.py +1669 -0
- teradataml/common/deprecations.py +166 -0
- teradataml/common/exceptions.py +147 -0
- teradataml/common/formula.py +743 -0
- teradataml/common/garbagecollector.py +666 -0
- teradataml/common/logger.py +1261 -0
- teradataml/common/messagecodes.py +518 -0
- teradataml/common/messages.py +262 -0
- teradataml/common/pylogger.py +67 -0
- teradataml/common/sqlbundle.py +764 -0
- teradataml/common/td_coltype_code_to_tdtype.py +48 -0
- teradataml/common/utils.py +3166 -0
- teradataml/common/warnings.py +36 -0
- teradataml/common/wrapper_utils.py +625 -0
- teradataml/config/__init__.py +0 -0
- teradataml/config/dummy_file1.cfg +5 -0
- teradataml/config/dummy_file2.cfg +3 -0
- teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
- teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
- teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
- teradataml/context/__init__.py +0 -0
- teradataml/context/aed_context.py +223 -0
- teradataml/context/context.py +1462 -0
- teradataml/data/A_loan.csv +19 -0
- teradataml/data/BINARY_REALS_LEFT.csv +11 -0
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
- teradataml/data/B_loan.csv +49 -0
- teradataml/data/BuoyData2.csv +17 -0
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
- teradataml/data/Convolve2RealsLeft.csv +5 -0
- teradataml/data/Convolve2RealsRight.csv +5 -0
- teradataml/data/Convolve2ValidLeft.csv +11 -0
- teradataml/data/Convolve2ValidRight.csv +11 -0
- teradataml/data/DFFTConv_Real_8_8.csv +65 -0
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/Mall_customer_data.csv +201 -0
- teradataml/data/Orders1_12mf.csv +25 -0
- teradataml/data/Pi_loan.csv +7 -0
- teradataml/data/SMOOTHED_DATA.csv +7 -0
- teradataml/data/TestDFFT8.csv +9 -0
- teradataml/data/TestRiver.csv +109 -0
- teradataml/data/Traindata.csv +28 -0
- teradataml/data/__init__.py +0 -0
- teradataml/data/acf.csv +17 -0
- teradataml/data/adaboost_example.json +34 -0
- teradataml/data/adaboostpredict_example.json +24 -0
- teradataml/data/additional_table.csv +11 -0
- teradataml/data/admissions_test.csv +21 -0
- teradataml/data/admissions_train.csv +41 -0
- teradataml/data/admissions_train_nulls.csv +41 -0
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -0
- teradataml/data/ageandpressure.csv +31 -0
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/antiselect_example.json +36 -0
- teradataml/data/antiselect_input.csv +8 -0
- teradataml/data/antiselect_input_mixed_case.csv +8 -0
- teradataml/data/applicant_external.csv +7 -0
- teradataml/data/applicant_reference.csv +7 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/arima_example.json +9 -0
- teradataml/data/assortedtext_input.csv +8 -0
- teradataml/data/attribution_example.json +34 -0
- teradataml/data/attribution_sample_table.csv +27 -0
- teradataml/data/attribution_sample_table1.csv +6 -0
- teradataml/data/attribution_sample_table2.csv +11 -0
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +43 -0
- teradataml/data/bank_web_clicks2.csv +91 -0
- teradataml/data/bank_web_url.csv +85 -0
- teradataml/data/barrier.csv +2 -0
- teradataml/data/barrier_new.csv +3 -0
- teradataml/data/betweenness_example.json +14 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -0
- teradataml/data/bin_fit_ip.csv +4 -0
- teradataml/data/binary_complex_left.csv +11 -0
- teradataml/data/binary_complex_right.csv +11 -0
- teradataml/data/binary_matrix_complex_left.csv +21 -0
- teradataml/data/binary_matrix_complex_right.csv +21 -0
- teradataml/data/binary_matrix_real_left.csv +21 -0
- teradataml/data/binary_matrix_real_right.csv +21 -0
- teradataml/data/blood2ageandweight.csv +26 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -0
- teradataml/data/burst_data.csv +5 -0
- teradataml/data/burst_example.json +21 -0
- teradataml/data/byom_example.json +34 -0
- teradataml/data/bytes_table.csv +4 -0
- teradataml/data/cal_housing_ex_raw.csv +70 -0
- teradataml/data/callers.csv +7 -0
- teradataml/data/calls.csv +10 -0
- teradataml/data/cars_hist.csv +33 -0
- teradataml/data/cat_table.csv +25 -0
- teradataml/data/ccm_example.json +32 -0
- teradataml/data/ccm_input.csv +91 -0
- teradataml/data/ccm_input2.csv +13 -0
- teradataml/data/ccmexample.csv +101 -0
- teradataml/data/ccmprepare_example.json +9 -0
- teradataml/data/ccmprepare_input.csv +91 -0
- teradataml/data/cfilter_example.json +12 -0
- teradataml/data/changepointdetection_example.json +18 -0
- teradataml/data/changepointdetectionrt_example.json +8 -0
- teradataml/data/chi_sq.csv +3 -0
- teradataml/data/churn_data.csv +14 -0
- teradataml/data/churn_emission.csv +35 -0
- teradataml/data/churn_initial.csv +3 -0
- teradataml/data/churn_state_transition.csv +5 -0
- teradataml/data/citedges_2.csv +745 -0
- teradataml/data/citvertices_2.csv +1210 -0
- teradataml/data/clicks2.csv +16 -0
- teradataml/data/clickstream.csv +13 -0
- teradataml/data/clickstream1.csv +11 -0
- teradataml/data/closeness_example.json +16 -0
- teradataml/data/complaints.csv +21 -0
- teradataml/data/complaints_mini.csv +3 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_testtoken.csv +224 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/complaints_tokens_test.csv +353 -0
- teradataml/data/complaints_traintoken.csv +472 -0
- teradataml/data/computers_category.csv +1001 -0
- teradataml/data/computers_test1.csv +1252 -0
- teradataml/data/computers_train1.csv +5009 -0
- teradataml/data/computers_train1_clustered.csv +5009 -0
- teradataml/data/confusionmatrix_example.json +9 -0
- teradataml/data/conversion_event_table.csv +3 -0
- teradataml/data/corr_input.csv +17 -0
- teradataml/data/correlation_example.json +11 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/coxhazardratio_example.json +39 -0
- teradataml/data/coxph_example.json +15 -0
- teradataml/data/coxsurvival_example.json +28 -0
- teradataml/data/cpt.csv +41 -0
- teradataml/data/credit_ex_merged.csv +45 -0
- teradataml/data/creditcard_data.csv +1001 -0
- teradataml/data/customer_loyalty.csv +301 -0
- teradataml/data/customer_loyalty_newseq.csv +31 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +173 -0
- teradataml/data/decisionforest_example.json +37 -0
- teradataml/data/decisionforestpredict_example.json +38 -0
- teradataml/data/decisiontree_example.json +21 -0
- teradataml/data/decisiontreepredict_example.json +45 -0
- teradataml/data/dfft2_size4_real.csv +17 -0
- teradataml/data/dfft2_test_matrix16.csv +17 -0
- teradataml/data/dfft2conv_real_4_4.csv +65 -0
- teradataml/data/diabetes.csv +443 -0
- teradataml/data/diabetes_test.csv +89 -0
- teradataml/data/dict_table.csv +5 -0
- teradataml/data/docperterm_table.csv +4 -0
- teradataml/data/docs/__init__.py +1 -0
- teradataml/data/docs/byom/__init__.py +0 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
- teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
- teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
- teradataml/data/docs/byom/docs/__init__.py +0 -0
- teradataml/data/docs/sqle/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
- teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
- teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/tableoperator/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
- teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/uaf/__init__.py +0 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
- teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
- teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
- teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
- teradataml/data/dtw_example.json +18 -0
- teradataml/data/dtw_t1.csv +11 -0
- teradataml/data/dtw_t2.csv +4 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt2d_example.json +16 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_example.json +15 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/dwt_filter_dim.csv +5 -0
- teradataml/data/emission.csv +9 -0
- teradataml/data/emp_table_by_dept.csv +19 -0
- teradataml/data/employee_info.csv +4 -0
- teradataml/data/employee_table.csv +6 -0
- teradataml/data/excluding_event_table.csv +2 -0
- teradataml/data/finance_data.csv +6 -0
- teradataml/data/finance_data2.csv +61 -0
- teradataml/data/finance_data3.csv +93 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -0
- teradataml/data/fmeasure_example.json +12 -0
- teradataml/data/followers_leaders.csv +10 -0
- teradataml/data/fpgrowth_example.json +12 -0
- teradataml/data/frequentpaths_example.json +29 -0
- teradataml/data/friends.csv +9 -0
- teradataml/data/fs_input.csv +33 -0
- teradataml/data/fs_input1.csv +33 -0
- teradataml/data/genData.csv +513 -0
- teradataml/data/geodataframe_example.json +40 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -0
- teradataml/data/glm_example.json +56 -0
- teradataml/data/glml1l2_example.json +28 -0
- teradataml/data/glml1l2predict_example.json +54 -0
- teradataml/data/glmpredict_example.json +54 -0
- teradataml/data/gq_t1.csv +21 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/hconvolve_complex_right.csv +5 -0
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
- teradataml/data/histogram_example.json +12 -0
- teradataml/data/hmmdecoder_example.json +79 -0
- teradataml/data/hmmevaluator_example.json +25 -0
- teradataml/data/hmmsupervised_example.json +10 -0
- teradataml/data/hmmunsupervised_example.json +8 -0
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/house_values.csv +12 -0
- teradataml/data/house_values2.csv +13 -0
- teradataml/data/housing_cat.csv +7 -0
- teradataml/data/housing_data.csv +9 -0
- teradataml/data/housing_test.csv +47 -0
- teradataml/data/housing_test_binary.csv +47 -0
- teradataml/data/housing_train.csv +493 -0
- teradataml/data/housing_train_attribute.csv +5 -0
- teradataml/data/housing_train_binary.csv +437 -0
- teradataml/data/housing_train_parameter.csv +2 -0
- teradataml/data/housing_train_response.csv +493 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -0
- teradataml/data/ibm_stock1.csv +370 -0
- teradataml/data/identitymatch_example.json +22 -0
- teradataml/data/idf_table.csv +4 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/impressions.csv +101 -0
- teradataml/data/inflation.csv +21 -0
- teradataml/data/initial.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +13 -0
- teradataml/data/insurance.csv +1339 -0
- teradataml/data/interpolator_example.json +13 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/iris_altinput.csv +481 -0
- teradataml/data/iris_attribute_output.csv +8 -0
- teradataml/data/iris_attribute_test.csv +121 -0
- teradataml/data/iris_attribute_train.csv +481 -0
- teradataml/data/iris_category_expect_predict.csv +31 -0
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -0
- teradataml/data/iris_response_train.csv +121 -0
- teradataml/data/iris_test.csv +31 -0
- teradataml/data/iris_train.csv +121 -0
- teradataml/data/join_table1.csv +4 -0
- teradataml/data/join_table2.csv +4 -0
- teradataml/data/jsons/anly_function_name.json +7 -0
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/dataikupredict.json +148 -0
- teradataml/data/jsons/byom/datarobotpredict.json +147 -0
- teradataml/data/jsons/byom/h2opredict.json +195 -0
- teradataml/data/jsons/byom/onnxembeddings.json +267 -0
- teradataml/data/jsons/byom/onnxpredict.json +187 -0
- teradataml/data/jsons/byom/pmmlpredict.json +147 -0
- teradataml/data/jsons/paired_functions.json +450 -0
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/kmeans_example.json +23 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +51 -0
- teradataml/data/knn_example.json +19 -0
- teradataml/data/knnrecommender_example.json +7 -0
- teradataml/data/knnrecommenderpredict_example.json +12 -0
- teradataml/data/lar_example.json +17 -0
- teradataml/data/larpredict_example.json +30 -0
- teradataml/data/lc_new_predictors.csv +5 -0
- teradataml/data/lc_new_reference.csv +9 -0
- teradataml/data/lda_example.json +9 -0
- teradataml/data/ldainference_example.json +15 -0
- teradataml/data/ldatopicsummary_example.json +9 -0
- teradataml/data/levendist_input.csv +13 -0
- teradataml/data/levenshteindistance_example.json +10 -0
- teradataml/data/linreg_example.json +10 -0
- teradataml/data/load_example_data.py +350 -0
- teradataml/data/loan_prediction.csv +295 -0
- teradataml/data/lungcancer.csv +138 -0
- teradataml/data/mappingdata.csv +12 -0
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/milk_timeseries.csv +157 -0
- teradataml/data/min_max_titanic.csv +4 -0
- teradataml/data/minhash_example.json +6 -0
- teradataml/data/ml_ratings.csv +7547 -0
- teradataml/data/ml_ratings_10.csv +2445 -0
- teradataml/data/mobile_data.csv +13 -0
- teradataml/data/model1_table.csv +5 -0
- teradataml/data/model2_table.csv +5 -0
- teradataml/data/models/License_file.txt +1 -0
- teradataml/data/models/License_file_empty.txt +0 -0
- teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
- teradataml/data/models/dr_iris_rf +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
- teradataml/data/models/iris_db_glm_model.pmml +57 -0
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
- teradataml/data/models/iris_kmeans_model +0 -0
- teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
- teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
- teradataml/data/modularity_example.json +12 -0
- teradataml/data/movavg_example.json +8 -0
- teradataml/data/mtx1.csv +7 -0
- teradataml/data/mtx2.csv +13 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -0
- teradataml/data/naivebayes_example.json +10 -0
- teradataml/data/naivebayespredict_example.json +19 -0
- teradataml/data/naivebayestextclassifier2_example.json +7 -0
- teradataml/data/naivebayestextclassifier_example.json +8 -0
- teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
- teradataml/data/name_Find_configure.csv +10 -0
- teradataml/data/namedentityfinder_example.json +14 -0
- teradataml/data/namedentityfinderevaluator_example.json +10 -0
- teradataml/data/namedentityfindertrainer_example.json +6 -0
- teradataml/data/nb_iris_input_test.csv +31 -0
- teradataml/data/nb_iris_input_train.csv +121 -0
- teradataml/data/nbp_iris_model.csv +13 -0
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_extractor_text.csv +2 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/ner_sports_test2.csv +29 -0
- teradataml/data/ner_sports_train.csv +501 -0
- teradataml/data/nerevaluator_example.json +6 -0
- teradataml/data/nerextractor_example.json +18 -0
- teradataml/data/nermem_sports_test.csv +18 -0
- teradataml/data/nermem_sports_train.csv +51 -0
- teradataml/data/nertrainer_example.json +7 -0
- teradataml/data/ngrams_example.json +7 -0
- teradataml/data/notebooks/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
- teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
- teradataml/data/npath_example.json +23 -0
- teradataml/data/ntree_example.json +14 -0
- teradataml/data/numeric_strings.csv +5 -0
- teradataml/data/numerics.csv +4 -0
- teradataml/data/ocean_buoy.csv +17 -0
- teradataml/data/ocean_buoy2.csv +17 -0
- teradataml/data/ocean_buoys.csv +28 -0
- teradataml/data/ocean_buoys2.csv +10 -0
- teradataml/data/ocean_buoys_nonpti.csv +28 -0
- teradataml/data/ocean_buoys_seq.csv +29 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -0
- teradataml/data/orders1.csv +11 -0
- teradataml/data/orders1_12.csv +13 -0
- teradataml/data/orders_ex.csv +4 -0
- teradataml/data/pack_example.json +9 -0
- teradataml/data/package_tracking.csv +19 -0
- teradataml/data/package_tracking_pti.csv +19 -0
- teradataml/data/pagerank_example.json +13 -0
- teradataml/data/paragraphs_input.csv +6 -0
- teradataml/data/pathanalyzer_example.json +8 -0
- teradataml/data/pathgenerator_example.json +8 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/phrases.csv +7 -0
- teradataml/data/pivot_example.json +9 -0
- teradataml/data/pivot_input.csv +22 -0
- teradataml/data/playerRating.csv +31 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/postagger_example.json +7 -0
- teradataml/data/posttagger_output.csv +44 -0
- teradataml/data/production_data.csv +17 -0
- teradataml/data/production_data2.csv +7 -0
- teradataml/data/randomsample_example.json +32 -0
- teradataml/data/randomwalksample_example.json +9 -0
- teradataml/data/rank_table.csv +6 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/ref_mobile_data.csv +4 -0
- teradataml/data/ref_mobile_data_dense.csv +2 -0
- teradataml/data/ref_url.csv +17 -0
- teradataml/data/restaurant_reviews.csv +7 -0
- teradataml/data/retail_churn_table.csv +27772 -0
- teradataml/data/river_data.csv +145 -0
- teradataml/data/roc_example.json +8 -0
- teradataml/data/roc_input.csv +101 -0
- teradataml/data/rule_inputs.csv +6 -0
- teradataml/data/rule_table.csv +2 -0
- teradataml/data/sales.csv +7 -0
- teradataml/data/sales_transaction.csv +501 -0
- teradataml/data/salesdata.csv +342 -0
- teradataml/data/sample_cities.csv +3 -0
- teradataml/data/sample_shapes.csv +11 -0
- teradataml/data/sample_streets.csv +3 -0
- teradataml/data/sampling_example.json +16 -0
- teradataml/data/sax_example.json +17 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -0
- teradataml/data/scale_housing.csv +11 -0
- teradataml/data/scale_housing_test.csv +6 -0
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -0
- teradataml/data/scalebypartition_example.json +13 -0
- teradataml/data/scalemap_example.json +13 -0
- teradataml/data/scalesummary_example.json +12 -0
- teradataml/data/score_category.csv +101 -0
- teradataml/data/score_summary.csv +4 -0
- teradataml/data/script_example.json +10 -0
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/lightgbm/dataset.template +175 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +16 -0
- teradataml/data/scripts/mapper_replace.py +16 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/seeds.csv +10 -0
- teradataml/data/sentenceextractor_example.json +7 -0
- teradataml/data/sentiment_extract_input.csv +11 -0
- teradataml/data/sentiment_train.csv +16 -0
- teradataml/data/sentiment_word.csv +20 -0
- teradataml/data/sentiment_word_input.csv +20 -0
- teradataml/data/sentimentextractor_example.json +24 -0
- teradataml/data/sentimenttrainer_example.json +8 -0
- teradataml/data/sequence_table.csv +10 -0
- teradataml/data/seriessplitter_example.json +8 -0
- teradataml/data/sessionize_example.json +17 -0
- teradataml/data/sessionize_table.csv +116 -0
- teradataml/data/setop_test1.csv +24 -0
- teradataml/data/setop_test2.csv +22 -0
- teradataml/data/soc_nw_edges.csv +11 -0
- teradataml/data/soc_nw_vertices.csv +8 -0
- teradataml/data/souvenir_timeseries.csv +168 -0
- teradataml/data/sparse_iris_attribute.csv +5 -0
- teradataml/data/sparse_iris_test.csv +121 -0
- teradataml/data/sparse_iris_train.csv +601 -0
- teradataml/data/star1.csv +6 -0
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/state_transition.csv +5 -0
- teradataml/data/stock_data.csv +53 -0
- teradataml/data/stock_movement.csv +11 -0
- teradataml/data/stock_vol.csv +76 -0
- teradataml/data/stop_words.csv +8 -0
- teradataml/data/store_sales.csv +37 -0
- teradataml/data/stringsimilarity_example.json +8 -0
- teradataml/data/strsimilarity_input.csv +13 -0
- teradataml/data/students.csv +101 -0
- teradataml/data/svm_iris_input_test.csv +121 -0
- teradataml/data/svm_iris_input_train.csv +481 -0
- teradataml/data/svm_iris_model.csv +7 -0
- teradataml/data/svmdense_example.json +10 -0
- teradataml/data/svmdensepredict_example.json +19 -0
- teradataml/data/svmsparse_example.json +8 -0
- teradataml/data/svmsparsepredict_example.json +14 -0
- teradataml/data/svmsparsesummary_example.json +8 -0
- teradataml/data/target_mobile_data.csv +13 -0
- teradataml/data/target_mobile_data_dense.csv +5 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/templatedata.csv +1201 -0
- teradataml/data/templates/open_source_ml.json +11 -0
- teradataml/data/teradata_icon.ico +0 -0
- teradataml/data/teradataml_example.json +1473 -0
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -0
- teradataml/data/test_pacf_12.csv +37 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -0
- teradataml/data/text_inputs.csv +6 -0
- teradataml/data/textchunker_example.json +8 -0
- teradataml/data/textclassifier_example.json +7 -0
- teradataml/data/textclassifier_input.csv +7 -0
- teradataml/data/textclassifiertrainer_example.json +7 -0
- teradataml/data/textmorph_example.json +11 -0
- teradataml/data/textparser_example.json +15 -0
- teradataml/data/texttagger_example.json +12 -0
- teradataml/data/texttokenizer_example.json +7 -0
- teradataml/data/texttrainer_input.csv +11 -0
- teradataml/data/tf_example.json +7 -0
- teradataml/data/tfidf_example.json +14 -0
- teradataml/data/tfidf_input1.csv +201 -0
- teradataml/data/tfidf_train.csv +6 -0
- teradataml/data/time_table1.csv +535 -0
- teradataml/data/time_table2.csv +14 -0
- teradataml/data/timeseriesdata.csv +1601 -0
- teradataml/data/timeseriesdatasetsd4.csv +105 -0
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic.csv +892 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/token_table.csv +696 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +28 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/transformation_table.csv +6 -0
- teradataml/data/transformation_table_new.csv +2 -0
- teradataml/data/tv_spots.csv +16 -0
- teradataml/data/twod_climate_data.csv +117 -0
- teradataml/data/uaf_example.json +529 -0
- teradataml/data/univariatestatistics_example.json +9 -0
- teradataml/data/unpack_example.json +10 -0
- teradataml/data/unpivot_example.json +25 -0
- teradataml/data/unpivot_input.csv +8 -0
- teradataml/data/url_data.csv +10 -0
- teradataml/data/us_air_pass.csv +37 -0
- teradataml/data/us_population.csv +624 -0
- teradataml/data/us_states_shapes.csv +52 -0
- teradataml/data/varmax_example.json +18 -0
- teradataml/data/vectordistance_example.json +30 -0
- teradataml/data/ville_climatedata.csv +121 -0
- teradataml/data/ville_tempdata.csv +12 -0
- teradataml/data/ville_tempdata1.csv +12 -0
- teradataml/data/ville_temperature.csv +11 -0
- teradataml/data/waveletTable.csv +1605 -0
- teradataml/data/waveletTable2.csv +1605 -0
- teradataml/data/weightedmovavg_example.json +9 -0
- teradataml/data/wft_testing.csv +5 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +6 -0
- teradataml/data/word_embed_input_table2.csv +5 -0
- teradataml/data/word_embed_model.csv +23 -0
- teradataml/data/words_input.csv +13 -0
- teradataml/data/xconvolve_complex_left.csv +6 -0
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
- teradataml/data/xgboost_example.json +36 -0
- teradataml/data/xgboostpredict_example.json +32 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/__init__.py +0 -0
- teradataml/dataframe/copy_to.py +2446 -0
- teradataml/dataframe/data_transfer.py +2840 -0
- teradataml/dataframe/dataframe.py +20908 -0
- teradataml/dataframe/dataframe_utils.py +2114 -0
- teradataml/dataframe/fastload.py +794 -0
- teradataml/dataframe/functions.py +2110 -0
- teradataml/dataframe/indexer.py +424 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +1171 -0
- teradataml/dataframe/sql.py +10904 -0
- teradataml/dataframe/sql_function_parameters.py +440 -0
- teradataml/dataframe/sql_functions.py +652 -0
- teradataml/dataframe/sql_interfaces.py +220 -0
- teradataml/dataframe/vantage_function_types.py +675 -0
- teradataml/dataframe/window.py +694 -0
- teradataml/dbutils/__init__.py +3 -0
- teradataml/dbutils/dbutils.py +2871 -0
- teradataml/dbutils/filemgr.py +318 -0
- teradataml/gen_ai/__init__.py +2 -0
- teradataml/gen_ai/convAI.py +473 -0
- teradataml/geospatial/__init__.py +4 -0
- teradataml/geospatial/geodataframe.py +1105 -0
- teradataml/geospatial/geodataframecolumn.py +392 -0
- teradataml/geospatial/geometry_types.py +926 -0
- teradataml/hyperparameter_tuner/__init__.py +1 -0
- teradataml/hyperparameter_tuner/optimizer.py +4115 -0
- teradataml/hyperparameter_tuner/utils.py +303 -0
- teradataml/lib/__init__.py +0 -0
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/lib/libaed_0_1_ppc64le.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/_base.py +1321 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/_constants.py +61 -0
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +267 -0
- teradataml/options/__init__.py +148 -0
- teradataml/options/configure.py +489 -0
- teradataml/options/display.py +187 -0
- teradataml/plot/__init__.py +3 -0
- teradataml/plot/axis.py +1427 -0
- teradataml/plot/constants.py +15 -0
- teradataml/plot/figure.py +431 -0
- teradataml/plot/plot.py +810 -0
- teradataml/plot/query_generator.py +83 -0
- teradataml/plot/subplot.py +216 -0
- teradataml/scriptmgmt/UserEnv.py +4273 -0
- teradataml/scriptmgmt/__init__.py +3 -0
- teradataml/scriptmgmt/lls_utils.py +2157 -0
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/__init__.py +0 -0
- teradataml/series/series.py +537 -0
- teradataml/series/series_utils.py +71 -0
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +658 -0
- teradataml/store/feature_store/feature_store.py +4814 -0
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +7330 -0
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/Apply.py +979 -0
- teradataml/table_operators/Script.py +1739 -0
- teradataml/table_operators/TableOperator.py +1343 -0
- teradataml/table_operators/__init__.py +2 -0
- teradataml/table_operators/apply_query_generator.py +262 -0
- teradataml/table_operators/query_generator.py +493 -0
- teradataml/table_operators/table_operator_query_generator.py +462 -0
- teradataml/table_operators/table_operator_util.py +726 -0
- teradataml/table_operators/templates/dataframe_apply.template +184 -0
- teradataml/table_operators/templates/dataframe_map.template +176 -0
- teradataml/table_operators/templates/dataframe_register.template +73 -0
- teradataml/table_operators/templates/dataframe_udf.template +67 -0
- teradataml/table_operators/templates/script_executor.template +170 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +53 -0
- teradataml/utils/__init__.py +0 -0
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +943 -0
- teradataml/utils/internal_buffer.py +122 -0
- teradataml/utils/print_versions.py +206 -0
- teradataml/utils/utils.py +451 -0
- teradataml/utils/validators.py +3305 -0
- teradataml-20.0.0.8.dist-info/METADATA +2804 -0
- teradataml-20.0.0.8.dist-info/RECORD +1208 -0
- teradataml-20.0.0.8.dist-info/WHEEL +5 -0
- teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
- teradataml-20.0.0.8.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,2114 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
|
|
4
|
+
Unpublished work.
|
|
5
|
+
Copyright (c) 2018 by Teradata Corporation. All rights reserved.
|
|
6
|
+
TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
|
|
7
|
+
|
|
8
|
+
Primary Owner: mark.sandan@teradata.com
|
|
9
|
+
Secondary Owner:
|
|
10
|
+
|
|
11
|
+
This file implements util functions of data frame.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numbers
|
|
15
|
+
import re
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import json
|
|
18
|
+
from collections import OrderedDict
|
|
19
|
+
|
|
20
|
+
from teradataml.common.utils import UtilFuncs
|
|
21
|
+
from teradataml.common.aed_utils import AedUtils
|
|
22
|
+
from teradataml.common.constants import AEDConstants, PTITableConstants, \
|
|
23
|
+
SQLPattern, PythonTypes, TeradataConstants, SQLConstants
|
|
24
|
+
from teradataml.common.sqlbundle import SQLBundle
|
|
25
|
+
from teradataml.common.exceptions import TeradataMlException
|
|
26
|
+
from teradataml.common.messages import Messages
|
|
27
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
28
|
+
|
|
29
|
+
from teradataml.context.context import get_context, get_connection
|
|
30
|
+
from teradataml.context.context import _get_current_databasename
|
|
31
|
+
from teradataml.dbutils.dbutils import _execute_query_and_generate_pandas_df
|
|
32
|
+
|
|
33
|
+
from teradataml.options.display import display
|
|
34
|
+
from teradataml.options.configure import configure
|
|
35
|
+
from teradataml.utils.dtypes import _Dtypes, _DtypesMappers
|
|
36
|
+
from teradataml.utils.utils import execute_sql
|
|
37
|
+
|
|
38
|
+
from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
|
|
39
|
+
from teradatasqlalchemy.dialect import preparer, dialect as td_dialect
|
|
40
|
+
import teradatasqlalchemy.types as tdsqlalchemy
|
|
41
|
+
import teradataml.dataframe as tdmldf
|
|
42
|
+
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
43
|
+
|
|
44
|
+
from sqlalchemy.sql import select
|
|
45
|
+
from sqlalchemy.sql.expression import text
|
|
46
|
+
from sqlalchemy import table, column, func
|
|
47
|
+
from datetime import datetime, date, time
|
|
48
|
+
from decimal import Decimal
|
|
49
|
+
|
|
50
|
+
# TODO - Need to write unit testcases for these functions
|
|
51
|
+
class DataFrameUtils():
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def _execute_node_return_db_object_name(nodeid, metaexpression = None):
|
|
55
|
+
"""
|
|
56
|
+
Fetches queries and view names from AED node and creates views from queries
|
|
57
|
+
Additionally inspects the metaexpression for consistency
|
|
58
|
+
|
|
59
|
+
PARAMETERS:
|
|
60
|
+
nodeid: nodeid to execute
|
|
61
|
+
metaexpression: (optional) updated _metaexpr to validate
|
|
62
|
+
|
|
63
|
+
EXAMPLES:
|
|
64
|
+
_execute_node_return_db_object_name(nodeid)
|
|
65
|
+
_execute_node_return_db_object_name(nodeid, metaexpr)
|
|
66
|
+
|
|
67
|
+
RETURNS:
|
|
68
|
+
Top level view name.
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
aed_obj = AedUtils()
|
|
72
|
+
if not aed_obj._aed_is_node_executed(nodeid):
|
|
73
|
+
|
|
74
|
+
view_query_node_type_list = aed_obj._aed_get_exec_query(nodeid)
|
|
75
|
+
view_names, queries, node_query_types, node_ids = view_query_node_type_list
|
|
76
|
+
|
|
77
|
+
# Executing Nodes / Creating Views
|
|
78
|
+
for index in range(len(queries) - 1, -1, -1):
|
|
79
|
+
is_persist = False
|
|
80
|
+
if metaexpression and metaexpression._is_persist:
|
|
81
|
+
is_persist = True
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
85
|
+
UtilFuncs._create_table(view_names[index], queries[index], volatile=True)
|
|
86
|
+
|
|
87
|
+
elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_ML_QUERY_MULTI_OUTPUT.value or\
|
|
88
|
+
("OUT TABLE " in queries[index] and SQLPattern.SQLMR.value.match(queries[index])) or \
|
|
89
|
+
is_persist:
|
|
90
|
+
# TODO:: OR condition in above needs to be removed once AED support is added.
|
|
91
|
+
UtilFuncs._create_table(view_names[index], queries[index])
|
|
92
|
+
|
|
93
|
+
elif node_query_types in ['groupby', 'groupbytime']:
|
|
94
|
+
# If query_type is either groupby or groupbytime get it's parent
|
|
95
|
+
# nodeid and execute queries for the same
|
|
96
|
+
parent_nodeid = aed_obj._aed_get_parent_nodeids(nodeid)[0]
|
|
97
|
+
DataFrameUtils._execute_node_return_db_object_name(parent_nodeid)
|
|
98
|
+
|
|
99
|
+
elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_REFERENCE.value:
|
|
100
|
+
# Reference nodes - To be ignored.
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
else:
|
|
104
|
+
UtilFuncs._create_view(view_names[index], queries[index])
|
|
105
|
+
|
|
106
|
+
# Updating Node Status for executed Node
|
|
107
|
+
aed_obj._aed_update_node_state_single(node_ids[index], AEDConstants.AED_NODE_EXECUTED.value)
|
|
108
|
+
|
|
109
|
+
except Exception as emsg:
|
|
110
|
+
# TODO:: Append node execution details to emsg.
|
|
111
|
+
# Node description, such as nodeType or node operation, should be added
|
|
112
|
+
# here in 'emsg' to give away more information, where exactly
|
|
113
|
+
# node execution failed.
|
|
114
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_EXEC_SQL_FAILED, str(emsg)),
|
|
115
|
+
MessageCodes.TDMLDF_EXEC_SQL_FAILED)
|
|
116
|
+
|
|
117
|
+
# Setting New Table name retrieved to TDML DF
|
|
118
|
+
result_table_view_name = aed_obj._aed_get_tablename(nodeid)
|
|
119
|
+
# validate the metaexpression
|
|
120
|
+
if configure._validate_metaexpression:
|
|
121
|
+
DataFrameUtils._validate_metaexpression(result_table_view_name, metaexpression)
|
|
122
|
+
|
|
123
|
+
return result_table_view_name
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _validate_metaexpression(result_table_view_name, metaexpression):
|
|
127
|
+
"""
|
|
128
|
+
Inspects the metaexpression for consistency with the underlying table/view
|
|
129
|
+
|
|
130
|
+
PARAMETERS:
|
|
131
|
+
result_table_view_name: a string representing the table/view name to check column metadata
|
|
132
|
+
metaexpression: the metaexpr of the DataFrame to compare against the result_table_view_name
|
|
133
|
+
|
|
134
|
+
EXAMPLES:
|
|
135
|
+
_validate_metaexpression('t1', df._metaexpr)
|
|
136
|
+
_execute_node_return_db_object_name(nodeid, metaexpr)
|
|
137
|
+
|
|
138
|
+
RETURNS:
|
|
139
|
+
None
|
|
140
|
+
Outputs RuntimeWarnings if mismatches are found
|
|
141
|
+
|
|
142
|
+
"""
|
|
143
|
+
# metaexpression should have already been updated
|
|
144
|
+
if metaexpression is not None:
|
|
145
|
+
|
|
146
|
+
name = lambda x: x[0]
|
|
147
|
+
type_ = lambda x: x[1]
|
|
148
|
+
|
|
149
|
+
# compare sorted by name of column
|
|
150
|
+
df = sorted(UtilFuncs._describe_column(DataFrameUtils._get_metadata_from_table(result_table_view_name)), key = lambda x: x[0])
|
|
151
|
+
meta = sorted(metaexpression.c, key = lambda x: x.name)
|
|
152
|
+
|
|
153
|
+
# check length
|
|
154
|
+
if len(df) == len(meta):
|
|
155
|
+
for i in range(len(df)):
|
|
156
|
+
|
|
157
|
+
# map Teradata type to python type
|
|
158
|
+
meta_type = UtilFuncs._teradata_type_to_python_type(meta[i].type)
|
|
159
|
+
|
|
160
|
+
# compare column names and types
|
|
161
|
+
if meta[i].name != name(df[i]) or meta_type != type_(df[i]):
|
|
162
|
+
err_msg = "[Mismatch when checking %s]\n\t[Table/View] %s %s\n\t[MetaExpression] %s %s (mapped from => %s)\n"
|
|
163
|
+
raise RuntimeError(err_msg % (result_table_view_name,
|
|
164
|
+
name(df[i]), type_(df[i]),
|
|
165
|
+
meta[i].name, meta_type, meta[i].type))
|
|
166
|
+
else:
|
|
167
|
+
err_msg = "[Length mismatch when checking %s]\nSource Table/View has length %s but MetaExpression has length %s"
|
|
168
|
+
raise RuntimeError(err_msg % (result_table_view_name, len(df), len(meta)))
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _get_dataframe_print_string(table_name, index_label, orderby=None, undropped_index=None):
|
|
172
|
+
"""
|
|
173
|
+
Builds string output for teradataml DataFrame
|
|
174
|
+
|
|
175
|
+
PARAMETERS:
|
|
176
|
+
table_name - Name of the database table to read from.
|
|
177
|
+
index_label - String/List specifying column to use as index.
|
|
178
|
+
orderby - order expression to sort returned rows
|
|
179
|
+
|
|
180
|
+
EXAMPLES:
|
|
181
|
+
_get_dataframe_print_string('table_name', None, None)
|
|
182
|
+
|
|
183
|
+
RETURNS:
|
|
184
|
+
String representation of a pandas DataFrame.
|
|
185
|
+
|
|
186
|
+
"""
|
|
187
|
+
read_query = SQLBundle._build_top_n_print_query(table_name, display.max_rows, orderby)
|
|
188
|
+
|
|
189
|
+
if index_label is not None:
|
|
190
|
+
pandas_df = _execute_query_and_generate_pandas_df(read_query, index=index_label)
|
|
191
|
+
else:
|
|
192
|
+
pandas_df = _execute_query_and_generate_pandas_df(read_query)
|
|
193
|
+
|
|
194
|
+
return pandas_df.to_string()
|
|
195
|
+
|
|
196
|
+
@staticmethod
|
|
197
|
+
def _get_pprint_dtypes(column_names_and_types, null_count=False):
|
|
198
|
+
"""
|
|
199
|
+
returns a string containing the column names and types.
|
|
200
|
+
If null_count is not None, the string will also contain
|
|
201
|
+
the number of non-null values for each column.
|
|
202
|
+
|
|
203
|
+
PARAMETERS:
|
|
204
|
+
column_names_and_types - List of column names and types.
|
|
205
|
+
null_count(optional) - List of the non-null count for each column.
|
|
206
|
+
|
|
207
|
+
EXAMPLES:
|
|
208
|
+
>>>print(_get_pprint_dtypes(column_names_and_types)
|
|
209
|
+
accounts str
|
|
210
|
+
Feb float
|
|
211
|
+
Jan int
|
|
212
|
+
Mar int
|
|
213
|
+
Apr int
|
|
214
|
+
datetime str
|
|
215
|
+
|
|
216
|
+
>>>print(_get_pprint_dtypes(column_names_and_types, null_count)
|
|
217
|
+
accounts 3 non-null str
|
|
218
|
+
Feb 3 non-null float
|
|
219
|
+
Jan 3 non-null int
|
|
220
|
+
Mar 3 non-null int
|
|
221
|
+
Apr 3 non-null int
|
|
222
|
+
datetime 3 non-null str
|
|
223
|
+
|
|
224
|
+
RAISES:
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
col_names = [i[0] for i in column_names_and_types]
|
|
229
|
+
col_types = [i[1] for i in column_names_and_types]
|
|
230
|
+
max_col_names = len(max(col_names, key=len)) + 4
|
|
231
|
+
max_col_types = len(max(col_types, key=len))
|
|
232
|
+
dtypes_string = ""
|
|
233
|
+
if not null_count:
|
|
234
|
+
for colname, coltype in column_names_and_types:
|
|
235
|
+
dtypes_string += "{0: <{name_width}}{1: >{type_width}}\n".format(colname, coltype,
|
|
236
|
+
name_width=max_col_names,
|
|
237
|
+
type_width=max_col_types)
|
|
238
|
+
else:
|
|
239
|
+
null_count = [i[2] for i in column_names_and_types]
|
|
240
|
+
max_null_count = len(str(max(null_count, key=len)))
|
|
241
|
+
for colname, coltype, num_nulls in column_names_and_types:
|
|
242
|
+
dtypes_string += "{0: <{name_width}}{1: <{count_width}} non-null {2: <{type_width}}\n".format(colname,
|
|
243
|
+
num_nulls,
|
|
244
|
+
coltype,
|
|
245
|
+
name_width=max_col_names,
|
|
246
|
+
count_width=max_null_count,
|
|
247
|
+
type_width=max_col_types)
|
|
248
|
+
# Remove last new line character.
|
|
249
|
+
dtypes_string = dtypes_string[:-1]
|
|
250
|
+
return dtypes_string
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _get_metadata_from_table(table_name):
|
|
254
|
+
"""
|
|
255
|
+
Retrieves column metadata by executing a HELP COLUMN command.
|
|
256
|
+
|
|
257
|
+
PARAMETERS:
|
|
258
|
+
table_name - The table name or view name.
|
|
259
|
+
|
|
260
|
+
RETURNS:
|
|
261
|
+
returns the result set (column information) from HELP COLUMN.
|
|
262
|
+
|
|
263
|
+
RAISES:
|
|
264
|
+
Database error if an error occurred while executing the HELP COLUMN.
|
|
265
|
+
|
|
266
|
+
EXAMPLES:
|
|
267
|
+
df = DataFrame.from_table('mytab')
|
|
268
|
+
metadata = _get_metadata_from_table(df._table_name)
|
|
269
|
+
"""
|
|
270
|
+
# Construct HELP COLUMN command.
|
|
271
|
+
help_col_sql = SQLBundle._build_help_column(table_name)
|
|
272
|
+
# Execute HELP COLUMN command.
|
|
273
|
+
return UtilFuncs._execute_query(help_col_sql)
|
|
274
|
+
|
|
275
|
+
@staticmethod
|
|
276
|
+
def _extract_select_string(select_expression):
|
|
277
|
+
"""
|
|
278
|
+
Takes in a string/list representing a Pandas selection clause of any of the forms (only):
|
|
279
|
+
a) "col1" or 'col1'
|
|
280
|
+
b) ["col 1"] or ['col 1']
|
|
281
|
+
c) ["col1", "col2", "col3"] or ['col1', 'col2', 'col3']
|
|
282
|
+
d) [['col1', 'col2', 'col3']] or [["col1", "col2", "col3"]]
|
|
283
|
+
|
|
284
|
+
And returns a list with column strings representing the selection of the form:
|
|
285
|
+
a) ['col1']
|
|
286
|
+
b) ['col 1']
|
|
287
|
+
c) ['col1','col2','col3']
|
|
288
|
+
d) ['col1','col2','col3']
|
|
289
|
+
|
|
290
|
+
Column Names ("col1", "col2"..) are Strings representing database table Columns.
|
|
291
|
+
All Standard Teradata Data-Types for columns supported: INTEGER, VARCHAR(5), FLOAT.
|
|
292
|
+
|
|
293
|
+
PARAMETERS:
|
|
294
|
+
selection_expression - Expression representing column selection
|
|
295
|
+
Type - String or List of Strings or List of List (Single level only)
|
|
296
|
+
Required - Yes
|
|
297
|
+
|
|
298
|
+
EXAMPLES:
|
|
299
|
+
UtilFuncs._extract_select_string([['col1', 'col2']])
|
|
300
|
+
UtilFuncs._extract_select_string("col1")
|
|
301
|
+
UtilFuncs._extract_select_string(["col1"])
|
|
302
|
+
UtilFuncs._extract_select_string(["col1","col2","col3"])
|
|
303
|
+
|
|
304
|
+
RETURNS:
|
|
305
|
+
List of Strings representing column names.
|
|
306
|
+
|
|
307
|
+
RAISES:
|
|
308
|
+
TeradataMlException
|
|
309
|
+
"""
|
|
310
|
+
tdp = preparer(td_dialect)
|
|
311
|
+
column_list = []
|
|
312
|
+
|
|
313
|
+
# Single String column
|
|
314
|
+
if isinstance(select_expression, str):
|
|
315
|
+
# Error handling - Empty String
|
|
316
|
+
if select_expression == "":
|
|
317
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
|
|
318
|
+
MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
|
|
319
|
+
else:
|
|
320
|
+
column_list.append(tdp.quote("{0}".format(select_expression.strip())))
|
|
321
|
+
|
|
322
|
+
# Error Handling - [], [""], [None], ["None"], ['col1', None], ['col1', '']
|
|
323
|
+
elif isinstance(select_expression, list) and (len(select_expression) == 0 or
|
|
324
|
+
any(element in [None, "None", ""] for element in select_expression)):
|
|
325
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
|
|
326
|
+
MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
|
|
327
|
+
|
|
328
|
+
# List - ["col1"] or ["col1", "col2", "col3"]
|
|
329
|
+
elif isinstance(select_expression, list) and all(isinstance(element, str) for element in select_expression):
|
|
330
|
+
if len(select_expression) == 1:
|
|
331
|
+
column_list.append(tdp.quote("{0}".format(select_expression[0].strip())))
|
|
332
|
+
else:
|
|
333
|
+
column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression]
|
|
334
|
+
|
|
335
|
+
# List of List (Single level only - Pandas Syntax) - [["col1", "col2", "col3"]]
|
|
336
|
+
elif isinstance(select_expression, list) and isinstance(select_expression[0], list):
|
|
337
|
+
# Error Handling - [[]], [[""]], [[None]], [['col1', None]], [['col1', "None"]], ["col1", ""]
|
|
338
|
+
if len(select_expression[0]) == 0 or any(element in [None, "None", ""] for element in select_expression[0]):
|
|
339
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY),
|
|
340
|
+
MessageCodes.TDMLDF_SELECT_NONE_OR_EMPTY)
|
|
341
|
+
|
|
342
|
+
else:
|
|
343
|
+
column_list = [tdp.quote("{0}".format(element.strip())) for element in select_expression[0]]
|
|
344
|
+
|
|
345
|
+
# Any other Format - Raise Format Exception
|
|
346
|
+
else:
|
|
347
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_FORMAT),
|
|
348
|
+
MessageCodes.TDMLDF_SELECT_INVALID_FORMAT)
|
|
349
|
+
return column_list
|
|
350
|
+
|
|
351
|
+
@staticmethod
|
|
352
|
+
def _get_primary_index_from_table(table_name):
|
|
353
|
+
"""
|
|
354
|
+
Retrieves the primary index by executing a HELP INDEX command.
|
|
355
|
+
PARAMETERS:
|
|
356
|
+
table_name - The table name or volatile table name.
|
|
357
|
+
RETURNS:
|
|
358
|
+
Returns a list containing the primary index columns from HELP INDEX.
|
|
359
|
+
If the there are no primary index (NoPI table), then returns None.
|
|
360
|
+
RAISES:
|
|
361
|
+
Database error if an error occurred while executing the HELP INDEX.
|
|
362
|
+
EXAMPLES:
|
|
363
|
+
df = DataFrame.from_table('mytab')
|
|
364
|
+
index_labels = df._get_metadata_from_table(df._table_name)
|
|
365
|
+
"""
|
|
366
|
+
# Construct HELP INDEX command.
|
|
367
|
+
help_index_sql = SQLBundle._build_help_index(table_name)
|
|
368
|
+
|
|
369
|
+
# Execute HELP INDEX command.
|
|
370
|
+
rows = UtilFuncs._execute_query(help_index_sql)
|
|
371
|
+
index_labels = []
|
|
372
|
+
for row in rows:
|
|
373
|
+
# row[1] specifies whether the Index is 'Primary or Secondary?'
|
|
374
|
+
if row[1].rstrip() == 'P':
|
|
375
|
+
# row[2] specifies a string of comma separated column names that form the primary index
|
|
376
|
+
if "," in row[2]:
|
|
377
|
+
index_cols = row[2].split(',')
|
|
378
|
+
else:
|
|
379
|
+
index_cols = [row[2]]
|
|
380
|
+
for index_col in index_cols:
|
|
381
|
+
# Since TD_TIMEBUCKET column in PTI tables is not functionally available, it can be ignored
|
|
382
|
+
# from the index information as well (else a warning is generated by SQLAlchemy).
|
|
383
|
+
# row[12] corresponds to 'Timebucket' column in the results of 'help index' SQL command, which
|
|
384
|
+
# is available only when the version supports PTI tables.
|
|
385
|
+
if index_col == PTITableConstants.TD_TIMEBUCKET.value and len(row) > 11 and row[12] is not None:
|
|
386
|
+
continue
|
|
387
|
+
else:
|
|
388
|
+
index_labels.append(index_col)
|
|
389
|
+
|
|
390
|
+
if len(index_labels) > 0:
|
|
391
|
+
return index_labels
|
|
392
|
+
else:
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
@staticmethod
|
|
396
|
+
def __validate_sort_type_raise_exception(sort_col_type):
|
|
397
|
+
"""
|
|
398
|
+
Function to raise TeradatamlException for errors encountered for invalid/incorrect
|
|
399
|
+
"sort_col_type" in "_validate_sort_type" function.
|
|
400
|
+
|
|
401
|
+
PARAMETERS:
|
|
402
|
+
sort_col_type: The sort column type.
|
|
403
|
+
|
|
404
|
+
RETURNS:
|
|
405
|
+
None
|
|
406
|
+
|
|
407
|
+
RAISES:
|
|
408
|
+
TeradataMlException
|
|
409
|
+
|
|
410
|
+
EXAMPLES:
|
|
411
|
+
df_utils.__validate_sort_type_raise_exception(PythonTypes.PY_STRING_TYPE.value)
|
|
412
|
+
"""
|
|
413
|
+
msg = Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE).format(sort_col_type)
|
|
414
|
+
raise TeradataMlException(msg, MessageCodes.TDMLDF_DROP_INVALID_INDEX_TYPE)
|
|
415
|
+
|
|
416
|
+
@staticmethod
|
|
417
|
+
def _validate_sort_col_type(sort_col_type, sort_col_values):
|
|
418
|
+
"""
|
|
419
|
+
Validates a list of sort column values with the sort column type.
|
|
420
|
+
|
|
421
|
+
PARAMETERS:
|
|
422
|
+
sort_col_type - The sort column type.
|
|
423
|
+
sort_col_values - A single value or list-like values
|
|
424
|
+
|
|
425
|
+
RETURNS:
|
|
426
|
+
None
|
|
427
|
+
|
|
428
|
+
RAISES:
|
|
429
|
+
TeradataMlException
|
|
430
|
+
|
|
431
|
+
EXAMPLES:
|
|
432
|
+
df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, ["Jan", "Feb"])
|
|
433
|
+
df_utils._validate_sort_col_type(PythonTypes.PY_STRING_TYPE.value, "Jan")
|
|
434
|
+
df_utils._validate_sort_col_type(PythonTypes.PY_INT_TYPE.value, [1, 2])
|
|
435
|
+
"""
|
|
436
|
+
if isinstance(sort_col_values, list):
|
|
437
|
+
if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
|
|
438
|
+
if not all(isinstance(i, str) for i in sort_col_values):
|
|
439
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
440
|
+
elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
|
|
441
|
+
if not all(isinstance(i, float) for i in sort_col_values):
|
|
442
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
443
|
+
elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
|
|
444
|
+
if not all(isinstance(i, Decimal) for i in sort_col_values):
|
|
445
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
446
|
+
elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
|
|
447
|
+
if not all(isinstance(i, datetime) for i in sort_col_values):
|
|
448
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
449
|
+
elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
|
|
450
|
+
if not all(isinstance(i, time) for i in sort_col_values):
|
|
451
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
452
|
+
elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
|
|
453
|
+
if not all(isinstance(i, date) for i in sort_col_values):
|
|
454
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
455
|
+
elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
|
|
456
|
+
if not all(isinstance(i, bytes) for i in sort_col_values):
|
|
457
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
458
|
+
else: # numeric type
|
|
459
|
+
if not all(isinstance(i, numbers.Integral) for i in sort_col_values):
|
|
460
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
461
|
+
elif isinstance(sort_col_values, (tuple, dict)):
|
|
462
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
463
|
+
MessageCodes.TDMLDF_DROP_ARGS)
|
|
464
|
+
else:
|
|
465
|
+
if sort_col_type == PythonTypes.PY_STRING_TYPE.value:
|
|
466
|
+
if not isinstance(sort_col_values, str):
|
|
467
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
468
|
+
elif sort_col_type == PythonTypes.PY_FLOAT_TYPE.value:
|
|
469
|
+
if not isinstance(sort_col_values, float):
|
|
470
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
471
|
+
elif sort_col_type == PythonTypes.PY_DECIMAL_TYPE.value:
|
|
472
|
+
if not isinstance(sort_col_values, Decimal):
|
|
473
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
474
|
+
elif sort_col_type == PythonTypes.PY_DATETIME_TYPE.value:
|
|
475
|
+
if not isinstance(sort_col_values, datetime):
|
|
476
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
477
|
+
elif sort_col_type == PythonTypes.PY_TIME_TYPE.value:
|
|
478
|
+
if not isinstance(sort_col_values, time):
|
|
479
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
480
|
+
elif sort_col_type == PythonTypes.PY_DATE_TYPE.value:
|
|
481
|
+
if not isinstance(sort_col_values, date):
|
|
482
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
483
|
+
elif sort_col_type == PythonTypes.PY_BYTES_TYPE.value:
|
|
484
|
+
if not isinstance(sort_col_values, bytes):
|
|
485
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
486
|
+
else: # numeric type
|
|
487
|
+
if not isinstance(sort_col_values, numbers.Integral):
|
|
488
|
+
DataFrameUtils.__validate_sort_type_raise_exception(sort_col_type)
|
|
489
|
+
|
|
490
|
+
def _get_required_columns_types_from_metaexpr(metaexpr, col_list = None):
|
|
491
|
+
"""
|
|
492
|
+
Retrieves column names and types from meta expression. If you want to get types for only some columns,
|
|
493
|
+
pass those columns to 'col_list' argument.
|
|
494
|
+
|
|
495
|
+
PARAMETERS:
|
|
496
|
+
metaexpr - Meta expression from which columns and types to be retrieved.
|
|
497
|
+
col_list - Column list for which you want to get types
|
|
498
|
+
|
|
499
|
+
RETURNS:
|
|
500
|
+
Dictionary: key as column name and datatype as value.
|
|
501
|
+
|
|
502
|
+
EXAMPLES:
|
|
503
|
+
df = DataFrame.from_table('mytab')
|
|
504
|
+
metadata = _get_required_columns_types_from_metaexpr()
|
|
505
|
+
"""
|
|
506
|
+
|
|
507
|
+
if isinstance(col_list, str):
|
|
508
|
+
col_list = [col_list]
|
|
509
|
+
|
|
510
|
+
if col_list is not None and not isinstance(col_list, list):
|
|
511
|
+
return None
|
|
512
|
+
|
|
513
|
+
meta_cols = metaexpr.t.c
|
|
514
|
+
meta_columns = [c.name for c in meta_cols]
|
|
515
|
+
col_names = []
|
|
516
|
+
col_types = []
|
|
517
|
+
|
|
518
|
+
# When column list to retrieve is not provided, return meta-data for all columns.
|
|
519
|
+
if col_list is None:
|
|
520
|
+
for col_name in meta_columns:
|
|
521
|
+
col_names.append(meta_cols[col_name].name)
|
|
522
|
+
col_types.append(meta_cols[col_name].type)
|
|
523
|
+
|
|
524
|
+
# Return meta-data for only requested columns otherwise.
|
|
525
|
+
else:
|
|
526
|
+
for col_name in col_list:
|
|
527
|
+
if DataFrameUtils._check_column_exists(col_name, meta_columns):
|
|
528
|
+
# _metaexpr saves columns without quotes, so unquoting.
|
|
529
|
+
unquoted_col_name = col_name.replace('"', "")
|
|
530
|
+
col_names.append(meta_cols[unquoted_col_name].name)
|
|
531
|
+
col_types.append(meta_cols[unquoted_col_name].type)
|
|
532
|
+
|
|
533
|
+
return OrderedDict(zip(col_names, col_types))
|
|
534
|
+
|
|
535
|
+
@staticmethod
|
|
536
|
+
def _check_column_exists(column_name, df_columns):
|
|
537
|
+
"""
|
|
538
|
+
Checks provide column present in list of columns or not.
|
|
539
|
+
Note:
|
|
540
|
+
It is calling functions responsibility to send the column and columns list in proper case.
|
|
541
|
+
By default the look up is case-sensitive. If they would like to have it case insensitive, then
|
|
542
|
+
one should send the the column_name and df_columns list in lower case.
|
|
543
|
+
|
|
544
|
+
PARAMETERS:
|
|
545
|
+
column_name - Column name which need to be check.
|
|
546
|
+
df_columns - List columns in which column to be check.
|
|
547
|
+
|
|
548
|
+
RETURNS:
|
|
549
|
+
True if column exists otherwase False.
|
|
550
|
+
|
|
551
|
+
EXAMPLES:
|
|
552
|
+
df = DataFrame.from_table('mytab')
|
|
553
|
+
metadata = _check_column_exists("col1", df.columns)
|
|
554
|
+
"""
|
|
555
|
+
unquoted_df_columns = [column.replace('"', "") for column in df_columns]
|
|
556
|
+
if column_name.replace('"', "") in unquoted_df_columns:
|
|
557
|
+
return True
|
|
558
|
+
else:
|
|
559
|
+
return False
|
|
560
|
+
|
|
561
|
+
@staticmethod
|
|
562
|
+
def _validate_agg_function(func, col_names):
|
|
563
|
+
"""
|
|
564
|
+
Internal function to validate column names against actual
|
|
565
|
+
column names passed as parameter and aggregate operations
|
|
566
|
+
against valid aggregate operations.
|
|
567
|
+
|
|
568
|
+
PARAMETERS:
|
|
569
|
+
func - (Required) Specifies the function(s) to be
|
|
570
|
+
applied on teradataml DataFrame columns.
|
|
571
|
+
Acceptable formats for function(s) are string,
|
|
572
|
+
dictionary or list of strings/functions.
|
|
573
|
+
Accepted combinations are:
|
|
574
|
+
1. String function name
|
|
575
|
+
2. List of string functions
|
|
576
|
+
3. Dictionary of column names -> string function
|
|
577
|
+
(or list of string functions)
|
|
578
|
+
col_names - List. Names of the columns in Dataframe.
|
|
579
|
+
|
|
580
|
+
RETURNS:
|
|
581
|
+
operations - dict of columns -> aggregate operations
|
|
582
|
+
Unified dictionary, similar to func, even for string and
|
|
583
|
+
list of strings or functions.
|
|
584
|
+
|
|
585
|
+
RAISES:
|
|
586
|
+
1. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
|
|
587
|
+
operation(s) received in parameter 'func' is/are
|
|
588
|
+
invalid.
|
|
589
|
+
|
|
590
|
+
Possible Value :
|
|
591
|
+
Invalid aggregate operation(s): minimum, counter.
|
|
592
|
+
Valid aggregate operation(s): count, max, mean, min,
|
|
593
|
+
std, sum.
|
|
594
|
+
|
|
595
|
+
2. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
|
|
596
|
+
specified in 'func' is not present in the dataframe.
|
|
597
|
+
|
|
598
|
+
Possible Value :
|
|
599
|
+
Invalid column(s) given in parameter func: col1.
|
|
600
|
+
Valid column(s) : A, B, C, D.
|
|
601
|
+
|
|
602
|
+
EXAMPLES:
|
|
603
|
+
Let the dataframe contain 2 columns, col1 and col2.
|
|
604
|
+
|
|
605
|
+
VALID EXAMPLES:
|
|
606
|
+
1. operations = DataFrameUtils._validate_agg_function(
|
|
607
|
+
operation = 'mean', ['col1', 'col2'])
|
|
608
|
+
|
|
609
|
+
2. operations = DataFrameUtils._validate_agg_function(
|
|
610
|
+
operation = ['mean', 'min'], ['col1', 'col2'])
|
|
611
|
+
|
|
612
|
+
3. operations = DataFrameUtils._validate_agg_function(
|
|
613
|
+
{'col1' : ['mean', 'min'], 'col2' : 'count'},
|
|
614
|
+
['col1', 'col2'])
|
|
615
|
+
|
|
616
|
+
INVALID EXAMPLES:
|
|
617
|
+
1. operations = DataFrameUtils._validate_agg_function(
|
|
618
|
+
operation = 'counter', ['col1', 'col2'])
|
|
619
|
+
|
|
620
|
+
2. operations = DataFrameUtils._validate_agg_function(
|
|
621
|
+
{'col1' : ['mean', 'min'], 'col55' : 'count'},
|
|
622
|
+
['col1', 'col2'])
|
|
623
|
+
"""
|
|
624
|
+
operations = OrderedDict()
|
|
625
|
+
|
|
626
|
+
valid_aggregate_operations = UtilFuncs._get_valid_aggregate_operations()
|
|
627
|
+
|
|
628
|
+
if isinstance(func, str):
|
|
629
|
+
for column in col_names:
|
|
630
|
+
operations[column] = [func]
|
|
631
|
+
elif isinstance(func, list):
|
|
632
|
+
for column in col_names:
|
|
633
|
+
operations[column] = func
|
|
634
|
+
else:
|
|
635
|
+
for column in func:
|
|
636
|
+
if isinstance(func[column], str):
|
|
637
|
+
func[column] = [func[column]] # Converts string inside dict to list
|
|
638
|
+
operations = func
|
|
639
|
+
|
|
640
|
+
given_columns = operations.keys()
|
|
641
|
+
invalid_columns = []
|
|
642
|
+
all_operations = []
|
|
643
|
+
for col in given_columns:
|
|
644
|
+
all_operations.extend(operations[col])
|
|
645
|
+
if col not in col_names:
|
|
646
|
+
invalid_columns.append(col)
|
|
647
|
+
if len(invalid_columns) > 0: # If any of the columns specified is not present in dataframe
|
|
648
|
+
col_names.sort()
|
|
649
|
+
invalid_columns.sort()
|
|
650
|
+
msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
|
|
651
|
+
format(", ".join(invalid_columns), 'func', ", ".join(col_names))
|
|
652
|
+
raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
|
|
653
|
+
|
|
654
|
+
all_operations = list(set(all_operations))
|
|
655
|
+
invalid_aggregates = []
|
|
656
|
+
for operation in all_operations:
|
|
657
|
+
if operation not in valid_aggregate_operations and not operation.startswith('percentile_') \
|
|
658
|
+
and operation not in UtilFuncs._get_valid_time_series_aggregate_operations():
|
|
659
|
+
invalid_aggregates.append(operation)
|
|
660
|
+
if len(invalid_aggregates) > 0: # If any of the aggregate operations specified is not valid
|
|
661
|
+
# To raise error message, let's add other time series aggregate operations those can be
|
|
662
|
+
# used with DataFrame.agg() method.
|
|
663
|
+
valid_aggregate_operations = valid_aggregate_operations + ['first', 'last', 'mode']
|
|
664
|
+
valid_aggregate_operations.sort()
|
|
665
|
+
invalid_aggregates.sort()
|
|
666
|
+
msg = Messages.get_message(MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION). \
|
|
667
|
+
format(", ".join(invalid_aggregates), ", ".join(valid_aggregate_operations))
|
|
668
|
+
raise TeradataMlException(msg, MessageCodes.TDMLDF_INVALID_AGGREGATE_OPERATION)
|
|
669
|
+
|
|
670
|
+
return operations
|
|
671
|
+
|
|
672
|
+
@staticmethod
|
|
673
|
+
def _generate_aggregate_column_expression(df, column, operation, describe_op, tdp, **kwargs):
|
|
674
|
+
"""
|
|
675
|
+
Function generate the aggregate column expression for the provided column
|
|
676
|
+
and aggregate function.
|
|
677
|
+
|
|
678
|
+
PARAMETERS:
|
|
679
|
+
df:
|
|
680
|
+
Required Argument.
|
|
681
|
+
Specifies teradataml DataFrame which is to be used to get the
|
|
682
|
+
desired aggregate column expression.
|
|
683
|
+
Types: teradataml DataFrame
|
|
684
|
+
|
|
685
|
+
column:
|
|
686
|
+
Required Argument.
|
|
687
|
+
Specifies the column name for which desired aggregate operation is
|
|
688
|
+
to be used.
|
|
689
|
+
Types: str
|
|
690
|
+
|
|
691
|
+
operation:
|
|
692
|
+
Required Argument.
|
|
693
|
+
Specifies the aggregate operation.
|
|
694
|
+
Types: str
|
|
695
|
+
|
|
696
|
+
describe_op:
|
|
697
|
+
Required Argument.
|
|
698
|
+
Specifies a boolean flag, that will decide whether the aggregate
|
|
699
|
+
operation is being performed for DataFrame.describe() or not.
|
|
700
|
+
Types: bool
|
|
701
|
+
|
|
702
|
+
tdp:
|
|
703
|
+
Required Argument.
|
|
704
|
+
Specifies a TeradataIdentifierPreparer object. It is required for
|
|
705
|
+
quoting.
|
|
706
|
+
Types: TeradataIdentifierPreparer
|
|
707
|
+
|
|
708
|
+
kwargs:
|
|
709
|
+
Specifies miscellaneous keyword arguments that can be passed to
|
|
710
|
+
aggregate functions.
|
|
711
|
+
|
|
712
|
+
RAISES:
|
|
713
|
+
AttributeError - In case ColumnExpression does not have desired aggregate
|
|
714
|
+
function implemnted.
|
|
715
|
+
|
|
716
|
+
RETURNS:
|
|
717
|
+
A boolean stating whether column is supported or not, New column name,
|
|
718
|
+
New column type, A string representing column aggregate expression,
|
|
719
|
+
invalid column information in case column has unsupported type for an
|
|
720
|
+
aggregate operation.
|
|
721
|
+
|
|
722
|
+
EXAMPLES:
|
|
723
|
+
column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
|
|
724
|
+
DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
|
|
725
|
+
describe_op=describe_op, percentile=percentile,
|
|
726
|
+
tdp=tdp, **kwargs)
|
|
727
|
+
"""
|
|
728
|
+
try:
|
|
729
|
+
key_to_process = ""
|
|
730
|
+
# quote column names same as that of the Teradata reserved keywords.
|
|
731
|
+
if "sort_columns" in kwargs:
|
|
732
|
+
key_to_process = "sort_columns"
|
|
733
|
+
elif "sort_column" in kwargs:
|
|
734
|
+
key_to_process = "sort_column"
|
|
735
|
+
|
|
736
|
+
if key_to_process:
|
|
737
|
+
quoted_columns = UtilFuncs._process_for_teradata_keyword(kwargs[key_to_process])
|
|
738
|
+
kwargs[key_to_process] = quoted_columns
|
|
739
|
+
|
|
740
|
+
if operation.startswith('percentile_'):
|
|
741
|
+
try:
|
|
742
|
+
_operation_value = operation.split('_')
|
|
743
|
+
_floatvalue = float(_operation_value[1])
|
|
744
|
+
if _floatvalue < 0.0 or _floatvalue > 1.0 or len(_operation_value)>2:
|
|
745
|
+
raise ValueError
|
|
746
|
+
except ValueError:
|
|
747
|
+
mssg = "Invalid aggregate operation '{}' requested on TeradataML DataFrame." \
|
|
748
|
+
" Valid operation should be in format 'percentile_<floatvalue>' and <floatvalue> " \
|
|
749
|
+
"should be in range [0.0, 1.0].".format(operation)
|
|
750
|
+
raise ValueError(mssg) from None
|
|
751
|
+
func_expression = getattr(df[column], 'percentile')(percentile=_floatvalue)
|
|
752
|
+
else:
|
|
753
|
+
func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
|
|
754
|
+
new_column_name = column if describe_op else "{1}_{0}".format(column, operation)
|
|
755
|
+
# column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str
|
|
756
|
+
return True, new_column_name, NUMBER() if describe_op else func_expression.type, \
|
|
757
|
+
func_expression.compile_label(new_column_name), None
|
|
758
|
+
except AttributeError:
|
|
759
|
+
# We are here means, provided operation is invalid and is not supported.
|
|
760
|
+
# This if for internal purpose only.
|
|
761
|
+
# Validation of operations for "agg" should be done in "agg" only.
|
|
762
|
+
raise RuntimeError("Invalid aggregate function: {}".format(operation))
|
|
763
|
+
except RuntimeError:
|
|
764
|
+
# We are here means, column does not support the provided operation.
|
|
765
|
+
# We will ignore this and add the column to invalid column list.
|
|
766
|
+
# invalid_columns[operation].append("({0} - {1})".format(column, column_type)) OR
|
|
767
|
+
# We will raise Generic message, mentioning DF does not have any column with type
|
|
768
|
+
# supported to perform an operation.
|
|
769
|
+
if describe_op:
|
|
770
|
+
return True, tdp.quote(column), NUMBER(), 'null as {}'.format(tdp.quote(column)), None
|
|
771
|
+
else:
|
|
772
|
+
return False, None, None, None, "({0} - {1})".format(column, df[column].type)
|
|
773
|
+
except Exception:
|
|
774
|
+
raise
|
|
775
|
+
|
|
776
|
+
@staticmethod
|
|
777
|
+
def _construct_sql_expression_for_aggregations(df, column_names, column_types, func, percentile=.5,
|
|
778
|
+
describe_op=False, **kwargs):
|
|
779
|
+
"""
|
|
780
|
+
Internal function to create and return the sql expression
|
|
781
|
+
corresponding to given operation, given column_names and
|
|
782
|
+
column_types.
|
|
783
|
+
|
|
784
|
+
Column_types are used to check whether all the datatypes are
|
|
785
|
+
valid types for given operation and throw exception if they
|
|
786
|
+
are not.
|
|
787
|
+
|
|
788
|
+
PARAMETERS :
|
|
789
|
+
df:
|
|
790
|
+
Required Argument.
|
|
791
|
+
Specifies teradataml DataFrame which is to be used to get the desired
|
|
792
|
+
aggregate column expression.
|
|
793
|
+
Types: teradataml DataFrame
|
|
794
|
+
|
|
795
|
+
column_names:
|
|
796
|
+
Required Argument.
|
|
797
|
+
Specifies the column names for which desired aggregate operation is
|
|
798
|
+
to be executed.
|
|
799
|
+
Types: List of strings
|
|
800
|
+
|
|
801
|
+
column_types:
|
|
802
|
+
Required Argument.
|
|
803
|
+
Specifies the respective column types for column names.
|
|
804
|
+
Types: List of teradatasqlalchemy types
|
|
805
|
+
|
|
806
|
+
func:
|
|
807
|
+
Required Argument.
|
|
808
|
+
Specifies the aggregate function(s) to be applied on teradataml
|
|
809
|
+
DataFrame columns.
|
|
810
|
+
Types: string, dictionary or list of strings/functions.
|
|
811
|
+
Accepted combinations are:
|
|
812
|
+
1. String function name
|
|
813
|
+
2. List of functions
|
|
814
|
+
3. Dictionary containing column name as key and aggregate
|
|
815
|
+
function name (string or list of strings) as value
|
|
816
|
+
4. ColumnExpression built using the aggregate functions.
|
|
817
|
+
5. List of ColumnExpression built using the aggregate functions.
|
|
818
|
+
|
|
819
|
+
percentile:
|
|
820
|
+
Optional Argument.
|
|
821
|
+
Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
|
|
822
|
+
The default is .5, which returns the 50th percentiles.
|
|
823
|
+
Types: float
|
|
824
|
+
|
|
825
|
+
describe_op:
|
|
826
|
+
Optional Argument.
|
|
827
|
+
Specifies a boolean flag, that will decide whether the aggregate operation being
|
|
828
|
+
performed is for DataFrame.describe() or not.
|
|
829
|
+
Types: bool
|
|
830
|
+
|
|
831
|
+
kwargs:
|
|
832
|
+
Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
|
|
833
|
+
|
|
834
|
+
RETURNS :
|
|
835
|
+
a)sql expression as
|
|
836
|
+
1. 'min(col1) as min_col1, min(col2) as min_col2' if
|
|
837
|
+
col1 and col2 are the columns in Dataframe and
|
|
838
|
+
operation is 'min'
|
|
839
|
+
2. 'max(col1) as max_col1, max(col2) as max_col2' if
|
|
840
|
+
col1 and col2 are the columns in Dataframe and
|
|
841
|
+
operation is 'max'
|
|
842
|
+
3. 'min(col1) as min_col1, stddev_samp(col2) as
|
|
843
|
+
std_col2' if col1, col2 are the columns in
|
|
844
|
+
Dataframe and operations are min, std.
|
|
845
|
+
etc...
|
|
846
|
+
b) new columns' names (eg min_col1, min_col2 ...)
|
|
847
|
+
c) new columns' types
|
|
848
|
+
RAISES:
|
|
849
|
+
TeradataMLException
|
|
850
|
+
1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
|
|
851
|
+
aggregate operations do not support specified columns.
|
|
852
|
+
|
|
853
|
+
Possible Value :
|
|
854
|
+
No results. Below is/are the error message(s):
|
|
855
|
+
All selected columns [(col1 - VARCHAR)] is/are
|
|
856
|
+
unsupported for 'sum' operation.
|
|
857
|
+
|
|
858
|
+
2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
|
|
859
|
+
operation(s) received in parameter 'func' is/are
|
|
860
|
+
invalid.
|
|
861
|
+
|
|
862
|
+
Possible Value :
|
|
863
|
+
Invalid aggregate operation(s): minimum, counter.
|
|
864
|
+
Valid aggregate operation(s): count, max, mean, min,
|
|
865
|
+
std, sum.
|
|
866
|
+
|
|
867
|
+
3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
|
|
868
|
+
specified in func is not present in the dataframe.
|
|
869
|
+
|
|
870
|
+
Possible Value :
|
|
871
|
+
Invalid column(s) given in parameter func: col1.
|
|
872
|
+
Valid column(s) : A, B, C, D.
|
|
873
|
+
|
|
874
|
+
EXAMPLES:
|
|
875
|
+
col_names, col_types = \
|
|
876
|
+
df_utils._get_column_names_and_types_from_metaexpr(
|
|
877
|
+
self._metaexpr)
|
|
878
|
+
expr, new_col_names, new_col_types = \
|
|
879
|
+
df_utils._construct_sql_expression_for_aggregations(
|
|
880
|
+
col_names, col_types, 'min')
|
|
881
|
+
|
|
882
|
+
expr1, new_col_names1, new_col_types1 = \
|
|
883
|
+
df_utils._construct_sql_expression_for_aggregations(
|
|
884
|
+
col_names, col_types, ['min', 'sum'])
|
|
885
|
+
|
|
886
|
+
expr2, new_col_names2, new_col_types2 = \
|
|
887
|
+
df_utils._construct_sql_expression_for_aggregations(
|
|
888
|
+
col_names, col_types, {'col1 : ['min', 'sum'],
|
|
889
|
+
'col2' : 'mean'})
|
|
890
|
+
|
|
891
|
+
"""
|
|
892
|
+
|
|
893
|
+
# eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
|
|
894
|
+
# FLOAT(precision=0)]
|
|
895
|
+
|
|
896
|
+
# eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
|
|
897
|
+
# <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
|
|
898
|
+
# <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
|
|
899
|
+
|
|
900
|
+
# If function is of type time series aggregates, we process aggregation differently.
|
|
901
|
+
if not isinstance(func, str):
|
|
902
|
+
# If func is not instance of string, that means function call is
|
|
903
|
+
# from DataFrame.agg(). And is made to process multiple functions.
|
|
904
|
+
# We will process the same differently, as we need to map and serialize the
|
|
905
|
+
# column names and aggregate function operate on.
|
|
906
|
+
# If we have just function to be executed on complete DataFrame, then we don't need
|
|
907
|
+
# this extra processing. Also, if call is from DataFrame.agg(), time series aggregate check
|
|
908
|
+
# is not required. As special Time Series aggregate functions cannot be used in
|
|
909
|
+
# DataFrame.agg().
|
|
910
|
+
return DataFrameUtils._construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types,
|
|
911
|
+
func, percentile, describe_op,
|
|
912
|
+
**kwargs)
|
|
913
|
+
|
|
914
|
+
as_time_series_aggregate = False
|
|
915
|
+
if "as_time_series_aggregate" in kwargs.keys():
|
|
916
|
+
as_time_series_aggregate = kwargs["as_time_series_aggregate"]
|
|
917
|
+
|
|
918
|
+
if as_time_series_aggregate and func in ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top',
|
|
919
|
+
'top with ties']:
|
|
920
|
+
return DataFrameUtils._construct_sql_expression_for_time_series_aggregations(df, column_names, column_types,
|
|
921
|
+
func, **kwargs)
|
|
922
|
+
|
|
923
|
+
tdp = preparer(td_dialect)
|
|
924
|
+
|
|
925
|
+
# This variable is used to decide whether DataFrame has all columns unsupported
|
|
926
|
+
# for the provided operations.
|
|
927
|
+
all_unsupported_columns = True
|
|
928
|
+
valid_columns = []
|
|
929
|
+
invalid_columns = []
|
|
930
|
+
new_column_names = []
|
|
931
|
+
new_column_types = []
|
|
932
|
+
for column in column_names:
|
|
933
|
+
column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str = \
|
|
934
|
+
DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=func,
|
|
935
|
+
describe_op=describe_op, percentile=percentile,
|
|
936
|
+
tdp=tdp, **kwargs)
|
|
937
|
+
if column_supported:
|
|
938
|
+
all_unsupported_columns = False
|
|
939
|
+
new_column_names.append(new_column_name)
|
|
940
|
+
new_column_types.append(new_column_type)
|
|
941
|
+
valid_columns.append(column_aggr_expr)
|
|
942
|
+
else:
|
|
943
|
+
invalid_columns.append("({0} - {1})".format(column, df[column].type))
|
|
944
|
+
|
|
945
|
+
if all_unsupported_columns:
|
|
946
|
+
|
|
947
|
+
error_msgs = []
|
|
948
|
+
invalid_columns.sort() # Helps in catching the columns in lexicographic order
|
|
949
|
+
error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(", ".join(invalid_columns),
|
|
950
|
+
func)
|
|
951
|
+
error_msgs.append(error)
|
|
952
|
+
|
|
953
|
+
if len(valid_columns) == 0: # No supported columns in the given list of columns
|
|
954
|
+
raise TeradataMlException(Messages.get_message(
|
|
955
|
+
MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
|
|
956
|
+
MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
|
|
957
|
+
|
|
958
|
+
# quote column names same as that of the Teradata reserved keywords.
|
|
959
|
+
quote_column_name = [UtilFuncs._process_for_teradata_keyword(col) for col in column_names]
|
|
960
|
+
|
|
961
|
+
# Actual columns should be retained if "drop_columns" is set to False.
|
|
962
|
+
if kwargs.get("drop_columns") is False:
|
|
963
|
+
valid_columns = quote_column_name + valid_columns
|
|
964
|
+
new_column_names = column_names + new_column_names
|
|
965
|
+
new_column_types = column_types + new_column_types
|
|
966
|
+
|
|
967
|
+
aggregate_expr = ", ".join(valid_columns)
|
|
968
|
+
return aggregate_expr, new_column_names, new_column_types
|
|
969
|
+
|
|
970
|
+
@staticmethod
|
|
971
|
+
def _construct_sql_expression_for_aggregations_for_agg(df, column_names, column_types, func, percentile=.5,
|
|
972
|
+
describe_op=False, **kwargs):
|
|
973
|
+
"""
|
|
974
|
+
Internal function to create and return the sql expression
|
|
975
|
+
corresponding to given operation, given column_names and
|
|
976
|
+
column_types.
|
|
977
|
+
|
|
978
|
+
Column_types are used to check whether all the datatypes are
|
|
979
|
+
valid types for given operation and throw exception if they
|
|
980
|
+
are not.
|
|
981
|
+
|
|
982
|
+
PARAMETERS :
|
|
983
|
+
df:
|
|
984
|
+
Required Argument.
|
|
985
|
+
Specifies teradataml DataFrame which is to be used to get the desired
|
|
986
|
+
aggregate column expression.
|
|
987
|
+
Types: teradataml DataFrame
|
|
988
|
+
|
|
989
|
+
column_names:
|
|
990
|
+
Required Argument.
|
|
991
|
+
Specifies the column names for which desired aggregate operation is
|
|
992
|
+
to be executed.
|
|
993
|
+
Types: List of strings
|
|
994
|
+
|
|
995
|
+
column_types:
|
|
996
|
+
Required Argument.
|
|
997
|
+
Specifies the respective column types for column names.
|
|
998
|
+
Types: List of teradatasqlalchemy types
|
|
999
|
+
|
|
1000
|
+
func:
|
|
1001
|
+
Required Argument.
|
|
1002
|
+
Specifies the aggregate function(s) to be applied on teradataml
|
|
1003
|
+
DataFrame columns.
|
|
1004
|
+
Types: string, dictionary or list of strings/functions.
|
|
1005
|
+
Accepted combinations are:
|
|
1006
|
+
1. String function name
|
|
1007
|
+
2. List of functions
|
|
1008
|
+
3. Dictionary containing column name as key and aggregate
|
|
1009
|
+
function name (string or list of strings) as value
|
|
1010
|
+
4. ColumnExpression built using the aggregate functions.
|
|
1011
|
+
5. List of ColumnExpression built using the aggregate functions.
|
|
1012
|
+
|
|
1013
|
+
percentile:
|
|
1014
|
+
Optional Argument.
|
|
1015
|
+
Specifies a value between 0 and 1 that can only be used with func = 'percentile'.
|
|
1016
|
+
The default is .5, which returns the 50th percentiles.
|
|
1017
|
+
Types: float
|
|
1018
|
+
|
|
1019
|
+
describe_op:
|
|
1020
|
+
Optional Argument.
|
|
1021
|
+
Specifies a boolean flag, that will decide whether the aggregate operation being
|
|
1022
|
+
performed is for DataFrame.describe() or not.
|
|
1023
|
+
Types: bool
|
|
1024
|
+
|
|
1025
|
+
kwargs:
|
|
1026
|
+
Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
|
|
1027
|
+
|
|
1028
|
+
RETURNS :
|
|
1029
|
+
a)sql expression as
|
|
1030
|
+
1. 'min(col1) as min_col1, min(col2) as min_col2' if
|
|
1031
|
+
col1 and col2 are the columns in Dataframe and
|
|
1032
|
+
operation is 'min'
|
|
1033
|
+
2. 'max(col1) as max_col1, max(col2) as max_col2' if
|
|
1034
|
+
col1 and col2 are the columns in Dataframe and
|
|
1035
|
+
operation is 'max'
|
|
1036
|
+
3. 'min(col1) as min_col1, stddev_samp(col2) as
|
|
1037
|
+
std_col2' if col1, col2 are the columns in
|
|
1038
|
+
Dataframe and operations are min, std.
|
|
1039
|
+
etc...
|
|
1040
|
+
b) new columns' names (eg min_col1, min_col2 ...)
|
|
1041
|
+
c) new columns' types
|
|
1042
|
+
RAISES:
|
|
1043
|
+
TeradataMLException
|
|
1044
|
+
1. TDMLDF_AGGREGATE_COMBINED_ERR - If the provided
|
|
1045
|
+
aggregate operations do not support specified columns.
|
|
1046
|
+
|
|
1047
|
+
Possible Value :
|
|
1048
|
+
No results. Below is/are the error message(s):
|
|
1049
|
+
All selected columns [(col1 - VARCHAR)] is/are
|
|
1050
|
+
unsupported for 'sum' operation.
|
|
1051
|
+
|
|
1052
|
+
2. TDMLDF_INVALID_AGGREGATE_OPERATION - If the aggregate
|
|
1053
|
+
operation(s) received in parameter 'func' is/are
|
|
1054
|
+
invalid.
|
|
1055
|
+
|
|
1056
|
+
Possible Value :
|
|
1057
|
+
Invalid aggregate operation(s): minimum, counter.
|
|
1058
|
+
Valid aggregate operation(s): count, max, mean, min,
|
|
1059
|
+
std, sum.
|
|
1060
|
+
|
|
1061
|
+
3. TDMLDF_AGGREGATE_INVALID_COLUMN - If any of the columns
|
|
1062
|
+
specified in func is not present in the dataframe.
|
|
1063
|
+
|
|
1064
|
+
Possible Value :
|
|
1065
|
+
Invalid column(s) given in parameter func: col1.
|
|
1066
|
+
Valid column(s) : A, B, C, D.
|
|
1067
|
+
|
|
1068
|
+
EXAMPLES:
|
|
1069
|
+
col_names, col_types = \
|
|
1070
|
+
df_utils._get_column_names_and_types_from_metaexpr(
|
|
1071
|
+
self._metaexpr)
|
|
1072
|
+
expr, new_col_names, new_col_types = \
|
|
1073
|
+
df_utils._construct_sql_expression_for_aggregations_for_agg(
|
|
1074
|
+
col_names, col_types, 'min')
|
|
1075
|
+
|
|
1076
|
+
expr1, new_col_names1, new_col_types1 = \
|
|
1077
|
+
df_utils._construct_sql_expression_for_aggregations_for_agg(
|
|
1078
|
+
col_names, col_types, ['min', 'sum'])
|
|
1079
|
+
|
|
1080
|
+
expr2, new_col_names2, new_col_types2 = \
|
|
1081
|
+
df_utils._construct_sql_expression_for_aggregations_for_agg(
|
|
1082
|
+
col_names, col_types, {'col1 : ['min', 'sum'],
|
|
1083
|
+
'col2' : 'mean'})
|
|
1084
|
+
|
|
1085
|
+
"""
|
|
1086
|
+
# If function is of type time series aggregates, we process aggregation differently.
|
|
1087
|
+
# Also, one is not supposed to pass below time series aggreagtes to DataFrame.agg():
|
|
1088
|
+
# ['bottom', 'bottom with ties', 'delta_t', 'mad', 'top', 'top with ties']
|
|
1089
|
+
# Thus, no extra processing is required for time series aggregates over here.
|
|
1090
|
+
|
|
1091
|
+
if isinstance(func, ColumnExpression) or (isinstance(func, list) and isinstance(func[0], ColumnExpression)):
|
|
1092
|
+
column_agg_expr = []
|
|
1093
|
+
new_column_names = []
|
|
1094
|
+
new_column_types = []
|
|
1095
|
+
if isinstance(func, ColumnExpression):
|
|
1096
|
+
func= UtilFuncs._as_list(func)
|
|
1097
|
+
|
|
1098
|
+
# validate that func is a list of ColumnExpression
|
|
1099
|
+
for expr in func:
|
|
1100
|
+
if not isinstance(expr, ColumnExpression):
|
|
1101
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
1102
|
+
'func', ['str, dict, ColumnExpression or list of values of type(s): str, ColumnExpression']),
|
|
1103
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
1104
|
+
|
|
1105
|
+
for operations in func:
|
|
1106
|
+
alias = operations.alias_name
|
|
1107
|
+
column_agg_expr.append(operations.compile_label(alias))
|
|
1108
|
+
new_column_names.append(alias)
|
|
1109
|
+
new_column_types.append(operations.type)
|
|
1110
|
+
aggregate_expr = ", ".join(column_agg_expr)
|
|
1111
|
+
return aggregate_expr, new_column_names, new_column_types
|
|
1112
|
+
|
|
1113
|
+
# 'operations' contains dict of columns -> list of aggregate operations
|
|
1114
|
+
operations = DataFrameUtils._validate_agg_function(func, column_names)
|
|
1115
|
+
|
|
1116
|
+
all_valid_columns = []
|
|
1117
|
+
all_invalid_columns = {}
|
|
1118
|
+
all_new_column_names = []
|
|
1119
|
+
all_new_column_types = []
|
|
1120
|
+
|
|
1121
|
+
# For each column, the value is True if there is at least one valid operation (operation on valid datatype)
|
|
1122
|
+
column_supported = {}
|
|
1123
|
+
tdp = preparer(td_dialect)
|
|
1124
|
+
for column in operations:
|
|
1125
|
+
column_supported[column] = False
|
|
1126
|
+
valid_columns = []
|
|
1127
|
+
invalid_columns = {}
|
|
1128
|
+
new_column_names = []
|
|
1129
|
+
new_column_types = []
|
|
1130
|
+
for operation in operations[column]:
|
|
1131
|
+
is_colop_supported, new_col, new_coltype, column_aggr_expr, invalid_column_info = \
|
|
1132
|
+
DataFrameUtils._generate_aggregate_column_expression(df=df, column=column, operation=operation,
|
|
1133
|
+
describe_op=describe_op, percentile=percentile,
|
|
1134
|
+
tdp=tdp, **kwargs)
|
|
1135
|
+
if is_colop_supported:
|
|
1136
|
+
column_supported[column] = is_colop_supported
|
|
1137
|
+
new_column_names.append(new_col)
|
|
1138
|
+
new_column_types.append(new_coltype)
|
|
1139
|
+
valid_columns.append(column_aggr_expr)
|
|
1140
|
+
else:
|
|
1141
|
+
if operation in invalid_columns:
|
|
1142
|
+
invalid_columns[operation].append(invalid_column_info)
|
|
1143
|
+
else:
|
|
1144
|
+
invalid_columns[operation] = [invalid_column_info]
|
|
1145
|
+
|
|
1146
|
+
all_valid_columns.extend(valid_columns)
|
|
1147
|
+
all_new_column_names.extend(new_column_names)
|
|
1148
|
+
all_new_column_types.extend(new_column_types)
|
|
1149
|
+
|
|
1150
|
+
for operation in invalid_columns:
|
|
1151
|
+
if operation in all_invalid_columns:
|
|
1152
|
+
all_invalid_columns[operation].extend(invalid_columns[operation])
|
|
1153
|
+
else:
|
|
1154
|
+
all_invalid_columns[operation] = invalid_columns[operation]
|
|
1155
|
+
|
|
1156
|
+
unsupported_columns = [col for col in column_supported if not column_supported[col]]
|
|
1157
|
+
unsupported_columns.sort() # helps in catching the columns in lexicographic order
|
|
1158
|
+
|
|
1159
|
+
error_msgs = []
|
|
1160
|
+
for operation in sorted(all_invalid_columns):
|
|
1161
|
+
all_invalid_columns[operation].sort() # helps in catching the columns in
|
|
1162
|
+
# lexicographic order
|
|
1163
|
+
error = MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED.value.format(
|
|
1164
|
+
", ".join(all_invalid_columns[operation]), operation)
|
|
1165
|
+
error_msgs.append(error)
|
|
1166
|
+
|
|
1167
|
+
if not all(column_supported[oper] for oper in column_supported):
|
|
1168
|
+
new_msg = MessageCodes.TDMLDF_AGGREGATE_AGG_DICT_ERR.value.format(", ".join(unsupported_columns))
|
|
1169
|
+
error_msgs.append(new_msg)
|
|
1170
|
+
msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs))
|
|
1171
|
+
raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
|
|
1172
|
+
|
|
1173
|
+
elif len(all_valid_columns) == 0: # No supported columns in the given list of columns
|
|
1174
|
+
raise TeradataMlException(Messages.get_message(
|
|
1175
|
+
MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR).format("\n".join(error_msgs)),
|
|
1176
|
+
MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
|
|
1177
|
+
|
|
1178
|
+
aggregate_expr = ", ".join(all_valid_columns)
|
|
1179
|
+
return aggregate_expr, all_new_column_names, all_new_column_types
|
|
1180
|
+
|
|
1181
|
+
@staticmethod
|
|
1182
|
+
def _construct_sql_expression_for_time_series_aggregations(df, column_names, column_types, func, **kwargs):
|
|
1183
|
+
"""
|
|
1184
|
+
Internal function to create and return the sql expression
|
|
1185
|
+
corresponding to given time series function, given column_names and
|
|
1186
|
+
column_types.
|
|
1187
|
+
|
|
1188
|
+
Column_types are used to check whether all the datatypes are
|
|
1189
|
+
valid types for given operation and throw exception if they
|
|
1190
|
+
are not.
|
|
1191
|
+
|
|
1192
|
+
NOTE:
|
|
1193
|
+
This function should be used only for time series aggregates.
|
|
1194
|
+
|
|
1195
|
+
PARAMETERS :
|
|
1196
|
+
df:
|
|
1197
|
+
Required Argument.
|
|
1198
|
+
Specifies teradataml DataFrame which is to be used to get the desired
|
|
1199
|
+
aggregate column expression.
|
|
1200
|
+
Types: teradataml DataFrame
|
|
1201
|
+
|
|
1202
|
+
column_names:
|
|
1203
|
+
Required Argument.
|
|
1204
|
+
Specifies the column names for which desired aggregate operation is
|
|
1205
|
+
to be executed.
|
|
1206
|
+
Types: List of strings
|
|
1207
|
+
|
|
1208
|
+
column_types:
|
|
1209
|
+
Required Argument.
|
|
1210
|
+
Specifies the respective column types for column names.
|
|
1211
|
+
Types: List of teradatasqlalchemy types
|
|
1212
|
+
|
|
1213
|
+
func:
|
|
1214
|
+
Required Argument.
|
|
1215
|
+
Specifies the aggregate function(s) to be applied on teradataml
|
|
1216
|
+
DataFrame columns. For Time Series aggregates it is usually a string.
|
|
1217
|
+
Types: str
|
|
1218
|
+
|
|
1219
|
+
kwargs:
|
|
1220
|
+
Specifies miscellaneous keyword arguments that can be passed to aggregate functions.
|
|
1221
|
+
|
|
1222
|
+
RETURNS :
|
|
1223
|
+
a)sql expression as
|
|
1224
|
+
1. 'bottom(2, "col1") as "bottom2col1"' if
|
|
1225
|
+
col1 and col2 are the columns in Dataframe and
|
|
1226
|
+
operation is 'bottom'
|
|
1227
|
+
etc...
|
|
1228
|
+
b) new columns' names (eg min_col1, min_col2 ...)
|
|
1229
|
+
c) new columns' types
|
|
1230
|
+
|
|
1231
|
+
RAISES:
|
|
1232
|
+
None.
|
|
1233
|
+
|
|
1234
|
+
EXAMPLES:
|
|
1235
|
+
colname_to_numvalues = {"col1" : 2, "col2": 3}
|
|
1236
|
+
kwargs = {"colname_to_numvalues": colname_to_numvalues}
|
|
1237
|
+
aggregate_expr, column_names, column_types = \
|
|
1238
|
+
df_utils._construct_sql_expression_for_time_series_aggregations(column_names, column_types,
|
|
1239
|
+
func, **kwargs)
|
|
1240
|
+
|
|
1241
|
+
"""
|
|
1242
|
+
|
|
1243
|
+
# eg of column_types: [VARCHAR(length=13), INTEGER(), VARCHAR(length=60), VARCHAR(length=5),
|
|
1244
|
+
# FLOAT(precision=0)]
|
|
1245
|
+
|
|
1246
|
+
# eg of types of each column are <class 'teradatasqlalchemy.types.VARCHAR'>,
|
|
1247
|
+
# <class 'teradatasqlalchemy.types.INTEGER'>, <class 'teradatasqlalchemy.types.FLOAT'>,
|
|
1248
|
+
# <class 'teradatasqlalchemy.types.INTERVAL_MINUTE_TO_SECOND'> etc..
|
|
1249
|
+
|
|
1250
|
+
col_names_and_types = dict(zip(column_names, column_types))
|
|
1251
|
+
tdp = preparer(td_dialect)
|
|
1252
|
+
|
|
1253
|
+
select_columns = []
|
|
1254
|
+
new_column_names = []
|
|
1255
|
+
new_column_types = []
|
|
1256
|
+
if func in ["bottom", "bottom with ties", "top", "top with ties"]:
|
|
1257
|
+
# Processing for bottom and top.
|
|
1258
|
+
# Function name to be used in column aliasing.
|
|
1259
|
+
column_alias_func = func.replace(" ", "_")
|
|
1260
|
+
bottom_col_val = kwargs["colname_to_numvalues"]
|
|
1261
|
+
for column in sorted(list(bottom_col_val.keys())):
|
|
1262
|
+
new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
|
|
1263
|
+
quoted_parent_column_name = tdp.quote("{0}".format(column))
|
|
1264
|
+
quoted_new_column_name = tdp.quote(new_col_name)
|
|
1265
|
+
select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
|
|
1266
|
+
quoted_parent_column_name, quoted_new_column_name))
|
|
1267
|
+
new_column_names.append(new_col_name)
|
|
1268
|
+
new_column_types.append(col_names_and_types[column])
|
|
1269
|
+
|
|
1270
|
+
if func == "delta_t":
|
|
1271
|
+
# Argument processing for DELTA-T
|
|
1272
|
+
new_column_names.append("delta_t_td_timecode")
|
|
1273
|
+
quoted_new_column_name = tdp.quote(new_column_names[0])
|
|
1274
|
+
new_column_types.append(PERIOD_TIMESTAMP)
|
|
1275
|
+
select_columns.append("{0}((WHERE {1}), (WHERE {2})) as {3}".format(func, kwargs["start_condition"],
|
|
1276
|
+
kwargs["end_condition"],
|
|
1277
|
+
quoted_new_column_name))
|
|
1278
|
+
|
|
1279
|
+
if func == 'mad':
|
|
1280
|
+
# Processing for Median Absolute Deviation.
|
|
1281
|
+
# Function name to be used in column aliasing.
|
|
1282
|
+
column_alias_func = func.replace(" ", "_")
|
|
1283
|
+
bottom_col_val = kwargs["colname_to_numvalues"]
|
|
1284
|
+
for column in sorted(list(bottom_col_val.keys())):
|
|
1285
|
+
new_col_name = "{2}{0}{1}".format(bottom_col_val[column], column, column_alias_func)
|
|
1286
|
+
quoted_parent_column_name = tdp.quote("{0}".format(column))
|
|
1287
|
+
quoted_new_column_name = tdp.quote(new_col_name)
|
|
1288
|
+
select_columns.append("{0}({1}, {2}) as {3}".format(func, bottom_col_val[column],
|
|
1289
|
+
quoted_parent_column_name, quoted_new_column_name))
|
|
1290
|
+
new_column_names.append(new_col_name)
|
|
1291
|
+
if type(col_names_and_types[column]) in [DECIMAL, NUMBER]:
|
|
1292
|
+
# If column types is DECIMAL or NUMBER, then output column types should also be same.
|
|
1293
|
+
# Otherwise, it is FLOAT.
|
|
1294
|
+
new_column_types.append(col_names_and_types[column])
|
|
1295
|
+
else:
|
|
1296
|
+
new_column_types.append(FLOAT())
|
|
1297
|
+
|
|
1298
|
+
if "default_constant_for_columns" in kwargs.keys():
|
|
1299
|
+
column_names = kwargs["default_constant_for_columns"]
|
|
1300
|
+
column_types = [col_names_and_types[column] for column in column_names]
|
|
1301
|
+
if len(column_names) > 0:
|
|
1302
|
+
aggregate_expr, all_new_column_names, all_new_column_types = \
|
|
1303
|
+
DataFrameUtils._construct_sql_expression_for_aggregations(df=df, column_names=column_names,
|
|
1304
|
+
column_types=column_types, func=func,
|
|
1305
|
+
)
|
|
1306
|
+
aggregate_expr_default_column_list = [col.strip() for col in aggregate_expr.split(",")]
|
|
1307
|
+
select_columns = select_columns + aggregate_expr_default_column_list
|
|
1308
|
+
new_column_names = new_column_names + all_new_column_names
|
|
1309
|
+
new_column_types = new_column_types + all_new_column_types
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
aggregate_expr = ", ".join(select_columns)
|
|
1313
|
+
return aggregate_expr, new_column_names, new_column_types
|
|
1314
|
+
|
|
1315
|
+
@staticmethod
|
|
1316
|
+
def _validate_describe_columns(columns, metaexpr, groupby_column_list):
|
|
1317
|
+
"""
|
|
1318
|
+
Internal function to validate columns provided to describe() are correct or not,
|
|
1319
|
+
when DataFrame is output of groupby and groupby_time.
|
|
1320
|
+
PARAMETERS:
|
|
1321
|
+
columns:
|
|
1322
|
+
Optional Argument.
|
|
1323
|
+
Specifies the name(s) of columns we are collecting statistics for.
|
|
1324
|
+
Types: str ot List of strings (str)
|
|
1325
|
+
metaexpr:
|
|
1326
|
+
Required Argument.
|
|
1327
|
+
Specifies the meta expression for the dataframe.
|
|
1328
|
+
Types: _MetaExpression
|
|
1329
|
+
groupby_column_list:
|
|
1330
|
+
Optional Argument.
|
|
1331
|
+
Specifies the group by columns for the dataframe.
|
|
1332
|
+
Default Values: None.
|
|
1333
|
+
Types: str ot List of strings (str)
|
|
1334
|
+
Returns:
|
|
1335
|
+
None
|
|
1336
|
+
Raises:
|
|
1337
|
+
TeradataMLException
|
|
1338
|
+
"""
|
|
1339
|
+
invalid_columns = [_column for _column in groupby_column_list if _column in columns]
|
|
1340
|
+
if len(invalid_columns) > 0:
|
|
1341
|
+
all_columns = [col.name for col in metaexpr.c]
|
|
1342
|
+
valid_columns = [item for item in all_columns if item not in groupby_column_list]
|
|
1343
|
+
msg = Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN). \
|
|
1344
|
+
format(", ".join(invalid_columns), 'columns', ", ".join(valid_columns))
|
|
1345
|
+
raise TeradataMlException(msg, MessageCodes.TDMLDF_AGGREGATE_INVALID_COLUMN)
|
|
1346
|
+
|
|
1347
|
+
@staticmethod
|
|
1348
|
+
def _construct_describe_query(df, columns, metaexpr, percentiles, function_label, groupby_column_list=None,
|
|
1349
|
+
include=None, is_time_series_aggregate=False, verbose=False, distinct=False,
|
|
1350
|
+
statistics=None, **kwargs):
|
|
1351
|
+
"""
|
|
1352
|
+
Internal function to create the sql query for describe().
|
|
1353
|
+
|
|
1354
|
+
PARAMETERS :
|
|
1355
|
+
df:
|
|
1356
|
+
Required Argument.
|
|
1357
|
+
Specifies teradataml DataFrame we are collecting statistics for.
|
|
1358
|
+
Types: str
|
|
1359
|
+
|
|
1360
|
+
columns:
|
|
1361
|
+
Optional Argument.
|
|
1362
|
+
Specifies the name(s) of columns we are collecting statistics for.
|
|
1363
|
+
Types: str ot List of strings (str)
|
|
1364
|
+
|
|
1365
|
+
metaexpr:
|
|
1366
|
+
Required Argument.
|
|
1367
|
+
Specifies the meta expression for the dataframe.
|
|
1368
|
+
Types: _MetaExpression
|
|
1369
|
+
|
|
1370
|
+
percentiles:
|
|
1371
|
+
Required Argument.
|
|
1372
|
+
Specifies a list of values between 0 and 1.
|
|
1373
|
+
Types: List of floats
|
|
1374
|
+
|
|
1375
|
+
function_label:
|
|
1376
|
+
Required Argument.
|
|
1377
|
+
Specifies a string value used as the label for the aggregate function column.
|
|
1378
|
+
Types: str
|
|
1379
|
+
|
|
1380
|
+
groupby_column_list:
|
|
1381
|
+
Optional Argument.
|
|
1382
|
+
Specifies the group by columns for the dataframe.
|
|
1383
|
+
Default Values: None.
|
|
1384
|
+
Types: str ot List of strings (str)
|
|
1385
|
+
|
|
1386
|
+
include:
|
|
1387
|
+
Optional Argument.
|
|
1388
|
+
Specifies a string that must be "all" or None. If "all", then all columns will be included.
|
|
1389
|
+
Otherwise, only numeric columns are used for collecting statistics.
|
|
1390
|
+
Default Values: None.
|
|
1391
|
+
Types: str
|
|
1392
|
+
|
|
1393
|
+
is_time_series_aggregate:
|
|
1394
|
+
Optional Argument.
|
|
1395
|
+
Specifies a flag stating whether describe operation is time series aggregate or not.
|
|
1396
|
+
Default Values: False.
|
|
1397
|
+
Types: bool
|
|
1398
|
+
|
|
1399
|
+
verbose:
|
|
1400
|
+
Optional Argument.
|
|
1401
|
+
Specifies a flag stating whether DESCRIBE VERBOSE option for time series aggregate is to be
|
|
1402
|
+
performed or not.
|
|
1403
|
+
Default Values: False.
|
|
1404
|
+
Types: bool
|
|
1405
|
+
|
|
1406
|
+
distinct:
|
|
1407
|
+
Optional Argument.
|
|
1408
|
+
Specifies a flag that decides whether to consider duplicate rows in calculation or not.
|
|
1409
|
+
Default Values: False
|
|
1410
|
+
Types: bool
|
|
1411
|
+
|
|
1412
|
+
kwargs:
|
|
1413
|
+
Optional Arguments.
|
|
1414
|
+
Keyword argument for time series aggregate functions.
|
|
1415
|
+
|
|
1416
|
+
|
|
1417
|
+
RETURNS :
|
|
1418
|
+
A SQL query like:
|
|
1419
|
+
select 'count' as "func", cast(count("Feb") as Number) as "Feb", cast(count(accounts) as Number) as accounts from "PYUSER"."salesview"
|
|
1420
|
+
union all
|
|
1421
|
+
select 'mean' as "func", cast(avg("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
|
|
1422
|
+
union all
|
|
1423
|
+
select 'std' as "func", cast(stddev_samp("Feb") as Number) as "Feb", null as accounts from "PYUSER"."salesview"
|
|
1424
|
+
union all
|
|
1425
|
+
select 'min' as "func", cast(min("Feb") as Number) as "Feb", cast(min(accounts) as Number) as accounts from "PYUSER"."salesview"
|
|
1426
|
+
union all
|
|
1427
|
+
select '25%' as "func", percentile_cont(0.25) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
|
|
1428
|
+
union all
|
|
1429
|
+
select '50%' as "func", percentile_cont(0.5) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
|
|
1430
|
+
union all
|
|
1431
|
+
select '75%' as "func", percentile_cont(0.75) within group(order by cast("Feb" as Number) ) as "Feb", null as accounts from "PYUSER"."salesview"
|
|
1432
|
+
union all
|
|
1433
|
+
select 'max' as "func", cast(max("Feb") as Number) as "Feb", cast(max(accounts) as Number) as accounts from "PYUSER"."salesview"
|
|
1434
|
+
|
|
1435
|
+
RAISES:
|
|
1436
|
+
TeradataMLException
|
|
1437
|
+
|
|
1438
|
+
EXAMPLES:
|
|
1439
|
+
agg_query = \
|
|
1440
|
+
df_utils._construct_describe_query("self._table_name", self._metaexpr, [.25, .5, .75], "func", self.groupby_column_list)
|
|
1441
|
+
agg_query = \
|
|
1442
|
+
df_utils._construct_describe_query("self._table_name", self._metaexpr, [.3, .6], "func", self.groupby_column_list, include="all")
|
|
1443
|
+
|
|
1444
|
+
"""
|
|
1445
|
+
table_name = df._table_name
|
|
1446
|
+
operators = ["count", "mean", "std", "min", "percentile", "max"]
|
|
1447
|
+
all_operators = ["count", "unique", "mean", "std", "min", "percentile", "max"]
|
|
1448
|
+
|
|
1449
|
+
if is_time_series_aggregate and verbose:
|
|
1450
|
+
# Time Series Aggregate Operators for Vantage DESCRIBE function with verbose
|
|
1451
|
+
operators = ['max', 'mean', 'median', 'min', 'mode', "percentile", 'std']
|
|
1452
|
+
elif is_time_series_aggregate and not verbose:
|
|
1453
|
+
# Time Series Aggregate Operators for Vantage DESCRIBE function.
|
|
1454
|
+
operators = ['max', 'mean', 'min', 'std']
|
|
1455
|
+
|
|
1456
|
+
col_names = []
|
|
1457
|
+
col_types = []
|
|
1458
|
+
sel_agg_stmts = []
|
|
1459
|
+
tdp = preparer(td_dialect)
|
|
1460
|
+
quoted_function_label = tdp.quote(function_label)
|
|
1461
|
+
|
|
1462
|
+
if include is not None and include == 'all' and not is_time_series_aggregate:
|
|
1463
|
+
operators = all_operators
|
|
1464
|
+
|
|
1465
|
+
if include is None and statistics is not None:
|
|
1466
|
+
operators = statistics
|
|
1467
|
+
|
|
1468
|
+
table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
|
|
1469
|
+
is_time_series_aggregate, **kwargs)
|
|
1470
|
+
|
|
1471
|
+
for col in metaexpr.c:
|
|
1472
|
+
if (include is None and type(col.type) in UtilFuncs()._get_numeric_datatypes()) or include == 'all' or statistics is not None:
|
|
1473
|
+
if not(groupby is not None and col.name in groupby_column_list):
|
|
1474
|
+
if columns is None or col.name in columns:
|
|
1475
|
+
col_names.append(col.name)
|
|
1476
|
+
col_types.append(col.type)
|
|
1477
|
+
|
|
1478
|
+
|
|
1479
|
+
if len(col_names) == 0:
|
|
1480
|
+
raise TeradataMlException(
|
|
1481
|
+
Messages.get_message(MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR,
|
|
1482
|
+
"The DataFrame does not contain numeric columns"),
|
|
1483
|
+
MessageCodes.TDMLDF_AGGREGATE_COMBINED_ERR)
|
|
1484
|
+
for op in operators:
|
|
1485
|
+
if op == "percentile":
|
|
1486
|
+
for p in percentiles:
|
|
1487
|
+
agg_expr, new_col_names, new_col_types = \
|
|
1488
|
+
DataFrameUtils._construct_sql_expression_for_aggregations(df,
|
|
1489
|
+
col_names, col_types, op, percentile=p, describe_op=True, distinct=distinct,
|
|
1490
|
+
as_time_series_aggregate=is_time_series_aggregate)
|
|
1491
|
+
sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}%' as varchar(6)) as \"{1}\", {2} from {3} ".format(
|
|
1492
|
+
int(p*100), quoted_function_label, agg_expr, table_name, sel_groupby))
|
|
1493
|
+
else:
|
|
1494
|
+
agg_expr, new_col_names, new_col_types = \
|
|
1495
|
+
DataFrameUtils._construct_sql_expression_for_aggregations(df,
|
|
1496
|
+
col_names, col_types, op, describe_op=True, distinct=distinct,
|
|
1497
|
+
as_time_series_aggregate=is_time_series_aggregate)
|
|
1498
|
+
sel_agg_stmts.append("SELECT \n\t{4} \n\tcast('{0}' as varchar(6)) as \"{1}\", \n\t{2} \nfrom \n\t{3} ".format(
|
|
1499
|
+
op, quoted_function_label, agg_expr, table_name, sel_groupby))
|
|
1500
|
+
return " \nunion all\n ".join(sel_agg_stmts)
|
|
1501
|
+
|
|
1502
|
+
@staticmethod
|
|
1503
|
+
def _process_groupby_clause(table_name, groupby_column_list, is_time_series_aggregate, **kwargs):
|
|
1504
|
+
"""
|
|
1505
|
+
Internal function used to process and generate GROUP BY or GROUP BY TIME clauses required for
|
|
1506
|
+
query to be run for describe operation.
|
|
1507
|
+
|
|
1508
|
+
PARAMETERS:
|
|
1509
|
+
table_name:
|
|
1510
|
+
Required Arguments.
|
|
1511
|
+
Specifies table name to be used for forming describe query.
|
|
1512
|
+
Types: str
|
|
1513
|
+
|
|
1514
|
+
groupby_column_list:
|
|
1515
|
+
Required Arguments.
|
|
1516
|
+
Specifies list of column names involved in Group By.
|
|
1517
|
+
Types: List of Strings.
|
|
1518
|
+
|
|
1519
|
+
is_time_series_aggregate:
|
|
1520
|
+
Required Arguments.
|
|
1521
|
+
Specifies a boolean stating whether GROUP BY clause to be formed is for
|
|
1522
|
+
Time series aggregate or not.
|
|
1523
|
+
Types: bool
|
|
1524
|
+
|
|
1525
|
+
kwargs:
|
|
1526
|
+
Optional Arguments.
|
|
1527
|
+
Keyword argument for time series aggregate functions.
|
|
1528
|
+
|
|
1529
|
+
RETURNS:
|
|
1530
|
+
1. Table Name appended with GROUP BY clause.
|
|
1531
|
+
2. Column projection string for GROUP BY columns.
|
|
1532
|
+
3. Group By Clause.
|
|
1533
|
+
|
|
1534
|
+
RAISES:
|
|
1535
|
+
None.
|
|
1536
|
+
|
|
1537
|
+
EXAMPLES:
|
|
1538
|
+
table_name, sel_groupby, groupby = DataFrameUtils()._process_groupby_clause(table_name, groupby_column_list,
|
|
1539
|
+
is_time_series_aggregate, **kwargs)
|
|
1540
|
+
|
|
1541
|
+
"""
|
|
1542
|
+
sel_groupby = ""
|
|
1543
|
+
grp_by_clause = None
|
|
1544
|
+
|
|
1545
|
+
if is_time_series_aggregate:
|
|
1546
|
+
# For time series aggregate timebucket_duration is must so, it'll be always present in kwargs.
|
|
1547
|
+
grp_by_clause = "GROUP BY TIME ({0}".format(kwargs['timebucket_duration'])
|
|
1548
|
+
|
|
1549
|
+
# Add columns in value expression to GROUP BY TIME
|
|
1550
|
+
if 'value_expression' in kwargs and \
|
|
1551
|
+
kwargs['value_expression'] is not None and \
|
|
1552
|
+
len(kwargs['value_expression']) > 0:
|
|
1553
|
+
grp_by_clause = "{0} and {1}".format(grp_by_clause, ", ".join(kwargs['value_expression']))
|
|
1554
|
+
|
|
1555
|
+
# Complete the parenthesis for GROUP BY TIME
|
|
1556
|
+
grp_by_clause = "{0})".format(grp_by_clause)
|
|
1557
|
+
|
|
1558
|
+
# Add Time code column information.
|
|
1559
|
+
if 'timecode_column' in kwargs and \
|
|
1560
|
+
kwargs['timecode_column'] is not None and \
|
|
1561
|
+
len(kwargs['timecode_column']) > 0:
|
|
1562
|
+
if 'sequence_column' in kwargs and \
|
|
1563
|
+
kwargs['timecode_column'] is not None and \
|
|
1564
|
+
len(kwargs['timecode_column']) > 0:
|
|
1565
|
+
grp_by_clause = "{0} USING TIMECODE({1}, {2})".format(grp_by_clause, kwargs['timecode_column'],
|
|
1566
|
+
kwargs['sequence_column'])
|
|
1567
|
+
else:
|
|
1568
|
+
grp_by_clause = "{0} USING TIMECODE({1})".format(grp_by_clause, kwargs['timecode_column'])
|
|
1569
|
+
|
|
1570
|
+
# Add Fill inforamtion
|
|
1571
|
+
if 'fill' in kwargs and kwargs['fill'] is not None and len(kwargs['fill']) > 0:
|
|
1572
|
+
grp_by_clause = "{0} FILL({1})".format(grp_by_clause, kwargs['fill'])
|
|
1573
|
+
|
|
1574
|
+
else:
|
|
1575
|
+
if groupby_column_list is not None:
|
|
1576
|
+
grp_by_clause = "GROUP BY {0}".format(",".join(groupby_column_list))
|
|
1577
|
+
|
|
1578
|
+
if grp_by_clause is not None:
|
|
1579
|
+
table_name = "{0} \n{1}".format(table_name, grp_by_clause)
|
|
1580
|
+
tdp = preparer(td_dialect)
|
|
1581
|
+
for g in groupby_column_list:
|
|
1582
|
+
if is_time_series_aggregate:
|
|
1583
|
+
if g == "TIMECODE_RANGE":
|
|
1584
|
+
g = "$TD_TIMECODE_RANGE"
|
|
1585
|
+
|
|
1586
|
+
if "GROUP BY TIME" in g:
|
|
1587
|
+
g = "$TD_GROUP_BY_TIME"
|
|
1588
|
+
|
|
1589
|
+
quoted_name = tdp.quote(g)
|
|
1590
|
+
sel_groupby += "{0}, ".format(quoted_name)
|
|
1591
|
+
|
|
1592
|
+
return table_name, sel_groupby, grp_by_clause
|
|
1593
|
+
|
|
1594
|
+
@staticmethod
|
|
1595
|
+
def _get_column_names_and_types_from_metaexpr(metaexpr):
|
|
1596
|
+
"""
|
|
1597
|
+
Internal function to return column names and respective types
|
|
1598
|
+
given _metaexpr.
|
|
1599
|
+
|
|
1600
|
+
PARAMETERS:
|
|
1601
|
+
metaexpr:
|
|
1602
|
+
Required Argument.
|
|
1603
|
+
Dataframe's metaexpr. It is used to get column names and types.
|
|
1604
|
+
Types: MetaExpression
|
|
1605
|
+
|
|
1606
|
+
RETURNS:
|
|
1607
|
+
Two lists - one for column names and another for column types
|
|
1608
|
+
|
|
1609
|
+
RAISES:
|
|
1610
|
+
None
|
|
1611
|
+
|
|
1612
|
+
EXAMPLES:
|
|
1613
|
+
dfUtils._get_column_names_and_types_from_metaexpr(
|
|
1614
|
+
df._metaexpr)
|
|
1615
|
+
"""
|
|
1616
|
+
# Constructing New Column names & Types for selected columns ONLY using Parent _metaexpr
|
|
1617
|
+
col_names = []
|
|
1618
|
+
col_types = []
|
|
1619
|
+
for c in metaexpr.c:
|
|
1620
|
+
col_names.append(c.name)
|
|
1621
|
+
col_types.append(c.type)
|
|
1622
|
+
|
|
1623
|
+
return col_names, col_types
|
|
1624
|
+
|
|
1625
|
+
@staticmethod
|
|
1626
|
+
def _insert_all_from_table(to_table_name, from_table_name, column_list, to_schema_name=None,
|
|
1627
|
+
from_schema_name=None, temporary=False):
|
|
1628
|
+
"""
|
|
1629
|
+
Inserts all records from one table into the second, using columns ordered by column list.
|
|
1630
|
+
|
|
1631
|
+
PARAMETERS:
|
|
1632
|
+
to_table_name - String specifying name of the SQL Table to insert to.
|
|
1633
|
+
from_table_name - String specifying name of the SQL Table to insert from.
|
|
1634
|
+
column_list - List of strings specifying column names used in the insertion.
|
|
1635
|
+
to_schema_name - Name of the database schema to insert table data into.
|
|
1636
|
+
from_schema_name - Name of the database schema to insert table data from.
|
|
1637
|
+
temporary - Specifies whether to create Vantage tables as permanent or volatile.
|
|
1638
|
+
Default: False
|
|
1639
|
+
Note: When True:
|
|
1640
|
+
1. volatile Tables are created, and
|
|
1641
|
+
2. schema_name is ignored.
|
|
1642
|
+
When False, permanent tables are created.
|
|
1643
|
+
RETURNS:
|
|
1644
|
+
None
|
|
1645
|
+
|
|
1646
|
+
RAISES:
|
|
1647
|
+
Database error if an error occurred while executing the insert command.
|
|
1648
|
+
|
|
1649
|
+
EXAMPLES:
|
|
1650
|
+
df_utils._insert_all_from_table('table1_name', 'table2_name', ['col1', 'col2', 'col3'])
|
|
1651
|
+
"""
|
|
1652
|
+
tdp = preparer(td_dialect)
|
|
1653
|
+
|
|
1654
|
+
# Construct INSERT command.
|
|
1655
|
+
column_order_string = ', '.join([tdp.quote("{0}".format(element)) for element in column_list])
|
|
1656
|
+
|
|
1657
|
+
# Generate full name of the destination table.
|
|
1658
|
+
if to_schema_name:
|
|
1659
|
+
full_to_table_name = tdp.quote(to_schema_name) + "." + tdp.quote(to_table_name)
|
|
1660
|
+
elif temporary:
|
|
1661
|
+
full_to_table_name = tdp.quote(to_table_name)
|
|
1662
|
+
else:
|
|
1663
|
+
full_to_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
|
|
1664
|
+
to_table_name)
|
|
1665
|
+
|
|
1666
|
+
# Generate full name of source table.
|
|
1667
|
+
if from_schema_name:
|
|
1668
|
+
full_from_table_name = tdp.quote(from_schema_name) + "." + tdp.quote(from_table_name)
|
|
1669
|
+
else:
|
|
1670
|
+
full_from_table_name = tdp.quote(_get_current_databasename()) + "." + tdp.quote(
|
|
1671
|
+
from_table_name)
|
|
1672
|
+
|
|
1673
|
+
insert_sql = SQLBundle._build_insert_from_table_query(full_to_table_name,
|
|
1674
|
+
full_from_table_name,
|
|
1675
|
+
column_order_string)
|
|
1676
|
+
# Execute INSERT command.
|
|
1677
|
+
return UtilFuncs._execute_ddl_statement(insert_sql)
|
|
1678
|
+
|
|
1679
|
+
@staticmethod
|
|
1680
|
+
def _dataframe_has_column(data, column):
|
|
1681
|
+
"""
|
|
1682
|
+
Function to check whether column names in columns are present in given dataframe or not.
|
|
1683
|
+
This function is used currently only for Analytics wrappers.
|
|
1684
|
+
|
|
1685
|
+
PARAMETERS:
|
|
1686
|
+
data - teradataml DataFrame to check against for column existence.
|
|
1687
|
+
column - Column name (a string).
|
|
1688
|
+
|
|
1689
|
+
RAISES:
|
|
1690
|
+
None
|
|
1691
|
+
|
|
1692
|
+
EXAMPLES:
|
|
1693
|
+
DataFrameUtils._dataframe_has_column(data, col)
|
|
1694
|
+
"""
|
|
1695
|
+
if column in [c.name for c in data._metaexpr.c]:
|
|
1696
|
+
return True
|
|
1697
|
+
|
|
1698
|
+
return False
|
|
1699
|
+
|
|
1700
|
+
@staticmethod
|
|
1701
|
+
def _get_row_count(table_name):
|
|
1702
|
+
"""
|
|
1703
|
+
Function to return the row count of a teradataml Dataframe.
|
|
1704
|
+
This function is used currently to determine the shape/size of a dataframe.
|
|
1705
|
+
|
|
1706
|
+
PARAMETERS:
|
|
1707
|
+
table_name - Name of the table to get the row count for.
|
|
1708
|
+
|
|
1709
|
+
RAISES:
|
|
1710
|
+
TeradataMlException (TDMLDF_INFO_ERROR)
|
|
1711
|
+
|
|
1712
|
+
EXAMPLES:
|
|
1713
|
+
DataFrameUtils._get_row_count(table_name)
|
|
1714
|
+
"""
|
|
1715
|
+
# Construct COUNT(*) Query
|
|
1716
|
+
try:
|
|
1717
|
+
row_count_query = SQLBundle._build_nrows_print_query(table_name)
|
|
1718
|
+
res = execute_sql(row_count_query)
|
|
1719
|
+
return res.fetchone()[0]
|
|
1720
|
+
|
|
1721
|
+
except TeradataMlException:
|
|
1722
|
+
raise
|
|
1723
|
+
|
|
1724
|
+
except Exception as err:
|
|
1725
|
+
# TODO Better handle the level of information being presented to the user with logging
|
|
1726
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
|
|
1727
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1728
|
+
|
|
1729
|
+
@staticmethod
|
|
1730
|
+
def _get_scalar_value(table_name):
|
|
1731
|
+
"""
|
|
1732
|
+
Function to return the the only 1x1 (scalar) value from a teradataml Dataframe.
|
|
1733
|
+
|
|
1734
|
+
PARAMETERS:
|
|
1735
|
+
table_name - Name of the table to get the value from.
|
|
1736
|
+
|
|
1737
|
+
RAISES:
|
|
1738
|
+
TeradataMlException (TDMLDF_INFO_ERROR)
|
|
1739
|
+
|
|
1740
|
+
EXAMPLES:
|
|
1741
|
+
DataFrameUtils._get_scalar_value(table_name)
|
|
1742
|
+
"""
|
|
1743
|
+
# Construct the base Query
|
|
1744
|
+
try:
|
|
1745
|
+
select_query = SQLBundle._build_base_query(table_name)
|
|
1746
|
+
res = execute_sql(select_query)
|
|
1747
|
+
return res.fetchone()[0]
|
|
1748
|
+
|
|
1749
|
+
except TeradataMlException:
|
|
1750
|
+
raise
|
|
1751
|
+
|
|
1752
|
+
except Exception as err:
|
|
1753
|
+
# TODO Better handle the level of information being presented to the user with logging
|
|
1754
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR) + str(err),
|
|
1755
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1756
|
+
|
|
1757
|
+
@staticmethod
|
|
1758
|
+
def _get_sorted_nrow(df, n, sort_col, asc=True):
|
|
1759
|
+
"""
|
|
1760
|
+
Internal Utility function that returns a teradataml DataFrame containing n rows
|
|
1761
|
+
of the DataFrame. The Dataframe is sorted on the index column or the first column
|
|
1762
|
+
if there is no index column.
|
|
1763
|
+
|
|
1764
|
+
PARAMETERS:
|
|
1765
|
+
df: teradataml DataFrame
|
|
1766
|
+
n: Specifies the number of rows to select.
|
|
1767
|
+
Type: int
|
|
1768
|
+
sort_col: The column to sort on.
|
|
1769
|
+
Type: str
|
|
1770
|
+
asc: (optional) - Specifies sort order.
|
|
1771
|
+
If True, sort in ascending order.
|
|
1772
|
+
If False, sort in descending order.
|
|
1773
|
+
The default value is True.
|
|
1774
|
+
Type: boolean
|
|
1775
|
+
|
|
1776
|
+
RETURNS:
|
|
1777
|
+
teradataml DataFrame
|
|
1778
|
+
|
|
1779
|
+
EXAMPLES:
|
|
1780
|
+
DataFrameUtils._get_sorted_nrow(df, 10)
|
|
1781
|
+
DataFrameUtils._get_sorted_nrow(df, 20, asc=True)
|
|
1782
|
+
DataFrameUtils._get_sorted_nrow(df, 30, asc=False)
|
|
1783
|
+
|
|
1784
|
+
"""
|
|
1785
|
+
#TODO: implement and use this in teradatasqlalchemy
|
|
1786
|
+
tdp = preparer(td_dialect)
|
|
1787
|
+
aed_utils = AedUtils()
|
|
1788
|
+
|
|
1789
|
+
sort_order = "asc"
|
|
1790
|
+
if not asc:
|
|
1791
|
+
sort_order = "desc"
|
|
1792
|
+
|
|
1793
|
+
quoted_cols = [tdp.quote(c) for c in df.columns]
|
|
1794
|
+
sel_cols_str = ",".join(quoted_cols)
|
|
1795
|
+
sel_row_num = "row_number() over (order by \"{0}\" {1}) - 1 as tdml_row_num, {2}".format(sort_col, sort_order, sel_cols_str)
|
|
1796
|
+
filter_str = "tdml_row_num < {0}".format(n)
|
|
1797
|
+
sel_nodeid = aed_utils._aed_select(df._nodeid, sel_row_num)
|
|
1798
|
+
fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
|
|
1799
|
+
sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
|
|
1800
|
+
col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
|
|
1801
|
+
new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types),
|
|
1802
|
+
datalake=df._metaexpr.datalake)
|
|
1803
|
+
# Call the function from_node from appropriate class either DataFrame or GeoDataFrame
|
|
1804
|
+
new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
|
|
1805
|
+
new_df._orderby = df._orderby
|
|
1806
|
+
new_df._metaexpr._n_rows = n
|
|
1807
|
+
return new_df
|
|
1808
|
+
|
|
1809
|
+
@staticmethod
|
|
1810
|
+
def _get_database_names(connection, schema_name):
|
|
1811
|
+
"""
|
|
1812
|
+
Function to return a list valid of database names for a given sqlalchemy connection.
|
|
1813
|
+
This function is used to determine whether the database used is valid in user APIs such as copy_to_sql.
|
|
1814
|
+
|
|
1815
|
+
PARAMETERS:
|
|
1816
|
+
connection: Required Argument.
|
|
1817
|
+
A SQLAlchemy connection object.
|
|
1818
|
+
|
|
1819
|
+
schema_name: Required Argument
|
|
1820
|
+
String specifying the requested schema name.
|
|
1821
|
+
|
|
1822
|
+
RAISES:
|
|
1823
|
+
TeradataMlException (TDMLDF_INFO_ERROR)
|
|
1824
|
+
|
|
1825
|
+
EXAMPLES:
|
|
1826
|
+
DataFrameUtils._get_database_names(get_connection(), schema_name)
|
|
1827
|
+
"""
|
|
1828
|
+
#TODO: implement and use this in teradatasqlalchemy
|
|
1829
|
+
table_obj = table('databasesV', column('databasename'), schema='dbc')
|
|
1830
|
+
stmt = select(text(str(func.lower(table_obj.c.databasename)) + ' as databasename')).where(
|
|
1831
|
+
text('databasename (NOT CASESPECIFIC) = {} (NOT CASESPECIFIC)'.format(':schema_name')))
|
|
1832
|
+
stmt = text(str(stmt))
|
|
1833
|
+
stmt = stmt.bindparams(schema_name=schema_name)
|
|
1834
|
+
res = connection.execute(stmt).fetchall()
|
|
1835
|
+
return [name.databasename for name in res]
|
|
1836
|
+
|
|
1837
|
+
@staticmethod
|
|
1838
|
+
def _get_common_parent_df_from_dataframes(dfs):
|
|
1839
|
+
"""
|
|
1840
|
+
Internal function to return common parent dataframe from given list of dataframes.
|
|
1841
|
+
"""
|
|
1842
|
+
from teradataml import DataFrame, in_schema
|
|
1843
|
+
aed_utils = AedUtils()
|
|
1844
|
+
if len(dfs) == 1:
|
|
1845
|
+
operation = aed_utils._aed_get_node_query_type(dfs[0]._nodeid)
|
|
1846
|
+
if operation in ["table", "assign"]:
|
|
1847
|
+
# Assign might have removed some columns and if it is only one dataframe,
|
|
1848
|
+
# then return the same dataframe.
|
|
1849
|
+
# Return the same dataframe if it is DataFrame object from table.
|
|
1850
|
+
return dfs[0]
|
|
1851
|
+
|
|
1852
|
+
# If select node or any other node, then get the parent node and execute it.
|
|
1853
|
+
pids = aed_utils._aed_get_parent_nodeids(dfs[0]._nodeid)
|
|
1854
|
+
if not aed_utils._aed_is_node_executed(pids[0]):
|
|
1855
|
+
_ = DataFrameUtils._execute_node_return_db_object_name(pids[0])
|
|
1856
|
+
|
|
1857
|
+
tab_name_first = aed_utils._aed_get_source_tablename(pids[0])
|
|
1858
|
+
|
|
1859
|
+
db_schema = UtilFuncs._extract_db_name(tab_name_first)
|
|
1860
|
+
db_table_name = UtilFuncs._extract_table_name(tab_name_first)
|
|
1861
|
+
if dfs[0]._metaexpr.datalake:
|
|
1862
|
+
return DataFrame(in_schema(db_schema, db_table_name, dfs[0]._metaexpr.datalake))
|
|
1863
|
+
|
|
1864
|
+
if db_schema:
|
|
1865
|
+
return DataFrame(in_schema(db_schema, db_table_name))
|
|
1866
|
+
|
|
1867
|
+
return DataFrame(db_table_name)
|
|
1868
|
+
|
|
1869
|
+
pids_first = None
|
|
1870
|
+
parent_df = None
|
|
1871
|
+
for i in range(len(dfs)):
|
|
1872
|
+
pids = aed_utils._aed_get_parent_nodeids(dfs[i]._nodeid)
|
|
1873
|
+
|
|
1874
|
+
if parent_df is None:
|
|
1875
|
+
if not aed_utils._aed_is_node_executed(pids[0]):
|
|
1876
|
+
_ = DataFrameUtils._execute_node_return_db_object_name(pids[0])
|
|
1877
|
+
|
|
1878
|
+
tab_name_first = aed_utils._aed_get_source_tablename(pids[0])
|
|
1879
|
+
|
|
1880
|
+
db_schema = UtilFuncs._extract_db_name(tab_name_first)
|
|
1881
|
+
db_table_name = UtilFuncs._extract_table_name(tab_name_first)
|
|
1882
|
+
|
|
1883
|
+
if dfs[i]._metaexpr.datalake:
|
|
1884
|
+
parent_df = DataFrame(in_schema(db_schema, db_table_name, dfs[i]._metaexpr.datalake))
|
|
1885
|
+
elif db_schema:
|
|
1886
|
+
parent_df = DataFrame(in_schema(db_schema, db_table_name))
|
|
1887
|
+
else:
|
|
1888
|
+
parent_df = DataFrame(db_table_name)
|
|
1889
|
+
pids_first = pids
|
|
1890
|
+
else:
|
|
1891
|
+
if pids_first != pids:
|
|
1892
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.DFS_NO_COMMON_PARENT),
|
|
1893
|
+
MessageCodes.DFS_NO_COMMON_PARENT)
|
|
1894
|
+
|
|
1895
|
+
return parent_df
|
|
1896
|
+
|
|
1897
|
+
@staticmethod
|
|
1898
|
+
def _get_sqlalchemy_type_from_str(td_type):
|
|
1899
|
+
"""
|
|
1900
|
+
Function to get teradatasqlalchemy type from string representation of that type.
|
|
1901
|
+
|
|
1902
|
+
PARAMETERS:
|
|
1903
|
+
td_type:
|
|
1904
|
+
Required Argument.
|
|
1905
|
+
Specifies string representation of teradatasqlalchemy type.
|
|
1906
|
+
Types: str
|
|
1907
|
+
|
|
1908
|
+
RAISES:
|
|
1909
|
+
ValueError
|
|
1910
|
+
|
|
1911
|
+
EXAMPLES:
|
|
1912
|
+
>>> dt = DataFrameUtils._get_sqlalchemy_type_from_str("DECIMAL(4,4)")
|
|
1913
|
+
>>> dt
|
|
1914
|
+
DECIMAL(precision=4, scale=4)
|
|
1915
|
+
>>> type(dt)
|
|
1916
|
+
teradatasqlalchemy.types.DECIMAL
|
|
1917
|
+
|
|
1918
|
+
>>> dt = DataFrameUtils._get_sqlalchemy_type_from_str("VARCHAR(32000) CHARACTER SET UNICODE")
|
|
1919
|
+
>>> dt
|
|
1920
|
+
VARCHAR(length=32000, charset='UNICODE')
|
|
1921
|
+
>>> type(dt)
|
|
1922
|
+
teradatasqlalchemy.types.VARCHAR
|
|
1923
|
+
"""
|
|
1924
|
+
# 4 groups of pattern:
|
|
1925
|
+
# 1. Type name
|
|
1926
|
+
# 2. Comma separated parameters enclosed in parentheses
|
|
1927
|
+
# 3. Comma separated parameters without parenthesis
|
|
1928
|
+
# 4. Remaining string
|
|
1929
|
+
pattern = r"([A-Z0-9_]+)(\((.*)\))?(.*)"
|
|
1930
|
+
|
|
1931
|
+
m = re.match(pattern, td_type)
|
|
1932
|
+
td_str_type = m.group(1)
|
|
1933
|
+
td_str_params = m.group(3)
|
|
1934
|
+
td_str_remain = m.group(4)
|
|
1935
|
+
|
|
1936
|
+
if m is None or td_str_type not in _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER.keys():
|
|
1937
|
+
raise ValueError("Invalid Teradata type: {} from datalake".format(td_type))
|
|
1938
|
+
|
|
1939
|
+
if td_str_type in ["VARCHAR", "CHAR"]:
|
|
1940
|
+
# If VARCHAR or CHAR, extract, length and charset from string.
|
|
1941
|
+
length = int(td_str_params.split(",")[0])
|
|
1942
|
+
charset = td_str_remain.strip().split(" ")[2]
|
|
1943
|
+
return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
|
|
1944
|
+
(length=length, charset=charset)
|
|
1945
|
+
|
|
1946
|
+
if td_str_type in ["BLOB"]:
|
|
1947
|
+
# Ignoring the charset as BLOB does not have it.
|
|
1948
|
+
# If BLOB, extract length from string.
|
|
1949
|
+
length = int(td_str_params.split(",")[0])
|
|
1950
|
+
return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
|
|
1951
|
+
(length=length)
|
|
1952
|
+
|
|
1953
|
+
if td_str_type in ["DECIMAL"]:
|
|
1954
|
+
# If DECIMAL, extract precision and scale from string.
|
|
1955
|
+
args = td_str_params.split(",")
|
|
1956
|
+
return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
|
|
1957
|
+
(precision=int(args[0]), scale=int(args[1]))
|
|
1958
|
+
|
|
1959
|
+
# TODO: Test for other data types once OTF team finalize all data types.
|
|
1960
|
+
return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]()
|
|
1961
|
+
|
|
1962
|
+
@staticmethod
|
|
1963
|
+
def _get_datalake_table_columns_info(schema, table_name, datalake, use_dialect=False):
|
|
1964
|
+
"""
|
|
1965
|
+
Function to get column names and corresponding teradatasqlalchemy types
|
|
1966
|
+
of a datalake table using results of 'help table <datalake>.<db_name>.<table_name>'
|
|
1967
|
+
SQL query.
|
|
1968
|
+
|
|
1969
|
+
PARAMETERS:
|
|
1970
|
+
schema:
|
|
1971
|
+
Required Argument.
|
|
1972
|
+
Specifies name of schema.
|
|
1973
|
+
Types: str
|
|
1974
|
+
|
|
1975
|
+
table_name:
|
|
1976
|
+
Required Argument.
|
|
1977
|
+
Specifies name of table.
|
|
1978
|
+
Types: str
|
|
1979
|
+
|
|
1980
|
+
datalake:
|
|
1981
|
+
Required Argument.
|
|
1982
|
+
Specifies name of datalake.
|
|
1983
|
+
Types: str
|
|
1984
|
+
|
|
1985
|
+
RAISES:
|
|
1986
|
+
TeradataMlException
|
|
1987
|
+
|
|
1988
|
+
EXAMPLES:
|
|
1989
|
+
>>> DataFrameUtils._get_datalake_table_columns_info(table_name = 'sales',
|
|
1990
|
+
... schema='otftestdb',
|
|
1991
|
+
... datalake='datalake_iceberg_glue')
|
|
1992
|
+
(['id', 'masters', 'gpa', 'stats', 'programming', 'admitted'],
|
|
1993
|
+
[INTEGER(),
|
|
1994
|
+
VARCHAR(length=2000, charset='UNICODE'),
|
|
1995
|
+
FLOAT(),
|
|
1996
|
+
VARCHAR(length=2000, charset='UNICODE'),
|
|
1997
|
+
VARCHAR(length=2000, charset='UNICODE'),
|
|
1998
|
+
INTEGER()])
|
|
1999
|
+
"""
|
|
2000
|
+
col_names = []
|
|
2001
|
+
col_types = []
|
|
2002
|
+
if not use_dialect:
|
|
2003
|
+
# Get the column information from the strings type.
|
|
2004
|
+
prepared = preparer(td_dialect())
|
|
2005
|
+
sqlbundle = SQLBundle()
|
|
2006
|
+
full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
|
|
2007
|
+
prepared.quote(schema),
|
|
2008
|
+
prepared.quote(table_name))
|
|
2009
|
+
help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
|
|
2010
|
+
|
|
2011
|
+
cur = execute_sql(help_table_sql)
|
|
2012
|
+
td_types_col_index = -1
|
|
2013
|
+
|
|
2014
|
+
for i, col_metadata in enumerate(cur.description):
|
|
2015
|
+
# Help Table returns column names and
|
|
2016
|
+
# corresponding IcebergType, TeradataInternalType,
|
|
2017
|
+
# TeradataType. We need to extract column index for
|
|
2018
|
+
# 'TeradataType' column.
|
|
2019
|
+
if col_metadata[0].lower() in ['teradatatype', 'Type']:
|
|
2020
|
+
td_types_col_index = i
|
|
2021
|
+
|
|
2022
|
+
if td_types_col_index > -1:
|
|
2023
|
+
for col_info in cur.fetchall():
|
|
2024
|
+
col_names.append(col_info[0])
|
|
2025
|
+
col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
|
|
2026
|
+
else:
|
|
2027
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
|
|
2028
|
+
MessageCodes.TDMLDF_CREATE_FAIL)
|
|
2029
|
+
else:
|
|
2030
|
+
new_kwarg = get_connection().dialect.__class__.__name__ + "_datalake"
|
|
2031
|
+
all_col_info = get_connection().dialect.get_columns(connection=get_connection(),
|
|
2032
|
+
table_name=table_name,
|
|
2033
|
+
schema=schema,
|
|
2034
|
+
table_only=True,
|
|
2035
|
+
**{new_kwarg: datalake})
|
|
2036
|
+
for col_dict in all_col_info:
|
|
2037
|
+
col_names.append(col_dict.get('name', col_dict.get('Column Name')))
|
|
2038
|
+
col_types.append(col_dict.get('type', col_dict.get('Type')))
|
|
2039
|
+
|
|
2040
|
+
return col_names, col_types
|
|
2041
|
+
|
|
2042
|
+
@staticmethod
|
|
2043
|
+
def check_otf_dataframe():
|
|
2044
|
+
"""Decorator for validating if DataFrame is created on OTF table or not and throw error."""
|
|
2045
|
+
def decorator(method):
|
|
2046
|
+
def wrapper(self, *args, **kwargs):
|
|
2047
|
+
if not self._datalake:
|
|
2048
|
+
attr = getattr(type(self), method.__name__, None)
|
|
2049
|
+
caller_name = method.__name__ + '()'
|
|
2050
|
+
if isinstance(attr, property):
|
|
2051
|
+
caller_name = method.__name__
|
|
2052
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.OTF_TABLE_REQUIRED,
|
|
2053
|
+
caller_name),
|
|
2054
|
+
MessageCodes.UNSUPPORTED_OPERATION)
|
|
2055
|
+
|
|
2056
|
+
return method(self, *args, **kwargs)
|
|
2057
|
+
|
|
2058
|
+
return wrapper
|
|
2059
|
+
|
|
2060
|
+
return decorator
|
|
2061
|
+
|
|
2062
|
+
@staticmethod
|
|
2063
|
+
def _get_column_info_from_query(query):
|
|
2064
|
+
"""
|
|
2065
|
+
DESCRIPTION:
|
|
2066
|
+
Get the column name and type from the SQL query.
|
|
2067
|
+
Note:
|
|
2068
|
+
This function obtains the SQL statement metadata without executing the query.
|
|
2069
|
+
|
|
2070
|
+
PARAMETERS:
|
|
2071
|
+
query:
|
|
2072
|
+
Required Argument.
|
|
2073
|
+
Specifies the SQL query to analyze.
|
|
2074
|
+
Types: str
|
|
2075
|
+
|
|
2076
|
+
RETURNS:
|
|
2077
|
+
dict: A dictionary mapping column names to their teradatasqlalchemy types.
|
|
2078
|
+
|
|
2079
|
+
EXAMPLES:
|
|
2080
|
+
>>> query = "SELECT * FROM my_table"
|
|
2081
|
+
>>> DataFrameUtils._get_column_info_from_query(query)
|
|
2082
|
+
{'col1': VARCHAR(length=20, charset='UNICODE'), 'col2': INTEGER()}
|
|
2083
|
+
"""
|
|
2084
|
+
# Get the column metadata by executing the query with teradata_rpo(S) and
|
|
2085
|
+
# teradata_fake_result_sets escape functions
|
|
2086
|
+
cur = execute_sql('{fn teradata_rpo(S)}{fn teradata_fake_result_sets}' + query)
|
|
2087
|
+
row = cur.fetchone()
|
|
2088
|
+
|
|
2089
|
+
# When using {fn teradata_rpo(S)}{fn teradata_fake_result_sets}, the result row contains:
|
|
2090
|
+
# >>> print([col[0] for col in cur.description])
|
|
2091
|
+
# ['NativeSQL', 'RequestNumber', 'StatementNumber', 'ActivityType', 'ActivityCount',
|
|
2092
|
+
# 'WarningCode', 'WarningMessage', 'ColumnMetadata', 'ParameterMetadata']
|
|
2093
|
+
#
|
|
2094
|
+
# Example of row[7] JSON structure:
|
|
2095
|
+
# [
|
|
2096
|
+
# {
|
|
2097
|
+
# "Title": "column_name",
|
|
2098
|
+
# "TypeName": "col_type",
|
|
2099
|
+
# "Precision": 0,
|
|
2100
|
+
# "Scale": 0,
|
|
2101
|
+
# ...
|
|
2102
|
+
# }
|
|
2103
|
+
# ]
|
|
2104
|
+
column_metadata_json = row[7]
|
|
2105
|
+
column_metadata = json.loads(column_metadata_json)
|
|
2106
|
+
|
|
2107
|
+
# Extract column name and get teradatasqlalchemy type from the column metadata.
|
|
2108
|
+
column_info = {}
|
|
2109
|
+
for col_info in column_metadata:
|
|
2110
|
+
col_name = col_info['Title']
|
|
2111
|
+
col_type = _Dtypes._get_td_type_from_metadata(col_info)
|
|
2112
|
+
column_info[col_name] = col_type
|
|
2113
|
+
|
|
2114
|
+
return column_info
|