teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +1935 -1640
- teradataml/__init__.py +70 -60
- teradataml/_version.py +11 -11
- teradataml/analytics/Transformations.py +2995 -2995
- teradataml/analytics/__init__.py +81 -83
- teradataml/analytics/analytic_function_executor.py +2040 -2010
- teradataml/analytics/analytic_query_generator.py +958 -958
- teradataml/analytics/byom/H2OPredict.py +514 -514
- teradataml/analytics/byom/PMMLPredict.py +437 -437
- teradataml/analytics/byom/__init__.py +14 -14
- teradataml/analytics/json_parser/__init__.py +130 -130
- teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
- teradataml/analytics/json_parser/json_store.py +191 -191
- teradataml/analytics/json_parser/metadata.py +1637 -1637
- teradataml/analytics/json_parser/utils.py +798 -803
- teradataml/analytics/meta_class.py +196 -196
- teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
- teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
- teradataml/analytics/sqle/__init__.py +97 -110
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
- teradataml/analytics/table_operator/__init__.py +10 -10
- teradataml/analytics/uaf/__init__.py +63 -63
- teradataml/analytics/utils.py +693 -692
- teradataml/analytics/valib.py +1603 -1600
- teradataml/automl/__init__.py +1683 -0
- teradataml/automl/custom_json_utils.py +1270 -0
- teradataml/automl/data_preparation.py +1011 -0
- teradataml/automl/data_transformation.py +789 -0
- teradataml/automl/feature_engineering.py +1580 -0
- teradataml/automl/feature_exploration.py +554 -0
- teradataml/automl/model_evaluation.py +151 -0
- teradataml/automl/model_training.py +1026 -0
- teradataml/catalog/__init__.py +1 -3
- teradataml/catalog/byom.py +1759 -1716
- teradataml/catalog/function_argument_mapper.py +859 -861
- teradataml/catalog/model_cataloging_utils.py +491 -1510
- teradataml/clients/auth_client.py +133 -0
- teradataml/clients/pkce_client.py +481 -481
- teradataml/common/aed_utils.py +7 -2
- teradataml/common/bulk_exposed_utils.py +111 -111
- teradataml/common/constants.py +1438 -1441
- teradataml/common/deprecations.py +160 -0
- teradataml/common/exceptions.py +73 -73
- teradataml/common/formula.py +742 -742
- teradataml/common/garbagecollector.py +597 -635
- teradataml/common/messagecodes.py +424 -431
- teradataml/common/messages.py +228 -231
- teradataml/common/sqlbundle.py +693 -693
- teradataml/common/td_coltype_code_to_tdtype.py +48 -48
- teradataml/common/utils.py +2424 -2500
- teradataml/common/warnings.py +25 -25
- teradataml/common/wrapper_utils.py +1 -110
- teradataml/config/dummy_file1.cfg +4 -4
- teradataml/config/dummy_file2.cfg +2 -2
- teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
- teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
- teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
- teradataml/context/aed_context.py +217 -217
- teradataml/context/context.py +1091 -999
- teradataml/data/A_loan.csv +19 -19
- teradataml/data/BINARY_REALS_LEFT.csv +11 -11
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
- teradataml/data/B_loan.csv +49 -49
- teradataml/data/BuoyData2.csv +17 -17
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
- teradataml/data/Convolve2RealsLeft.csv +5 -5
- teradataml/data/Convolve2RealsRight.csv +5 -5
- teradataml/data/Convolve2ValidLeft.csv +11 -11
- teradataml/data/Convolve2ValidRight.csv +11 -11
- teradataml/data/DFFTConv_Real_8_8.csv +65 -65
- teradataml/data/Orders1_12mf.csv +24 -24
- teradataml/data/Pi_loan.csv +7 -7
- teradataml/data/SMOOTHED_DATA.csv +7 -7
- teradataml/data/TestDFFT8.csv +9 -9
- teradataml/data/TestRiver.csv +109 -109
- teradataml/data/Traindata.csv +28 -28
- teradataml/data/acf.csv +17 -17
- teradataml/data/adaboost_example.json +34 -34
- teradataml/data/adaboostpredict_example.json +24 -24
- teradataml/data/additional_table.csv +10 -10
- teradataml/data/admissions_test.csv +21 -21
- teradataml/data/admissions_train.csv +41 -41
- teradataml/data/admissions_train_nulls.csv +41 -41
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -13
- teradataml/data/ageandpressure.csv +31 -31
- teradataml/data/antiselect_example.json +36 -36
- teradataml/data/antiselect_input.csv +8 -8
- teradataml/data/antiselect_input_mixed_case.csv +8 -8
- teradataml/data/applicant_external.csv +6 -6
- teradataml/data/applicant_reference.csv +6 -6
- teradataml/data/arima_example.json +9 -9
- teradataml/data/assortedtext_input.csv +8 -8
- teradataml/data/attribution_example.json +33 -33
- teradataml/data/attribution_sample_table.csv +27 -27
- teradataml/data/attribution_sample_table1.csv +6 -6
- teradataml/data/attribution_sample_table2.csv +11 -11
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +42 -42
- teradataml/data/bank_web_clicks2.csv +91 -91
- teradataml/data/bank_web_url.csv +85 -85
- teradataml/data/barrier.csv +2 -2
- teradataml/data/barrier_new.csv +3 -3
- teradataml/data/betweenness_example.json +13 -13
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -8
- teradataml/data/bin_fit_ip.csv +3 -3
- teradataml/data/binary_complex_left.csv +11 -11
- teradataml/data/binary_complex_right.csv +11 -11
- teradataml/data/binary_matrix_complex_left.csv +21 -21
- teradataml/data/binary_matrix_complex_right.csv +21 -21
- teradataml/data/binary_matrix_real_left.csv +21 -21
- teradataml/data/binary_matrix_real_right.csv +21 -21
- teradataml/data/blood2ageandweight.csv +26 -26
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -507
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -11
- teradataml/data/burst_data.csv +5 -5
- teradataml/data/burst_example.json +20 -20
- teradataml/data/byom_example.json +17 -17
- teradataml/data/bytes_table.csv +3 -3
- teradataml/data/cal_housing_ex_raw.csv +70 -70
- teradataml/data/callers.csv +7 -7
- teradataml/data/calls.csv +10 -10
- teradataml/data/cars_hist.csv +33 -33
- teradataml/data/cat_table.csv +24 -24
- teradataml/data/ccm_example.json +31 -31
- teradataml/data/ccm_input.csv +91 -91
- teradataml/data/ccm_input2.csv +13 -13
- teradataml/data/ccmexample.csv +101 -101
- teradataml/data/ccmprepare_example.json +8 -8
- teradataml/data/ccmprepare_input.csv +91 -91
- teradataml/data/cfilter_example.json +12 -12
- teradataml/data/changepointdetection_example.json +18 -18
- teradataml/data/changepointdetectionrt_example.json +8 -8
- teradataml/data/chi_sq.csv +2 -2
- teradataml/data/churn_data.csv +14 -14
- teradataml/data/churn_emission.csv +35 -35
- teradataml/data/churn_initial.csv +3 -3
- teradataml/data/churn_state_transition.csv +5 -5
- teradataml/data/citedges_2.csv +745 -745
- teradataml/data/citvertices_2.csv +1210 -1210
- teradataml/data/clicks2.csv +16 -16
- teradataml/data/clickstream.csv +12 -12
- teradataml/data/clickstream1.csv +11 -11
- teradataml/data/closeness_example.json +15 -15
- teradataml/data/complaints.csv +21 -21
- teradataml/data/complaints_mini.csv +3 -3
- teradataml/data/complaints_testtoken.csv +224 -224
- teradataml/data/complaints_tokens_test.csv +353 -353
- teradataml/data/complaints_traintoken.csv +472 -472
- teradataml/data/computers_category.csv +1001 -1001
- teradataml/data/computers_test1.csv +1252 -1252
- teradataml/data/computers_train1.csv +5009 -5009
- teradataml/data/computers_train1_clustered.csv +5009 -5009
- teradataml/data/confusionmatrix_example.json +9 -9
- teradataml/data/conversion_event_table.csv +3 -3
- teradataml/data/corr_input.csv +17 -17
- teradataml/data/correlation_example.json +11 -11
- teradataml/data/coxhazardratio_example.json +39 -39
- teradataml/data/coxph_example.json +15 -15
- teradataml/data/coxsurvival_example.json +28 -28
- teradataml/data/cpt.csv +41 -41
- teradataml/data/credit_ex_merged.csv +45 -45
- teradataml/data/customer_loyalty.csv +301 -301
- teradataml/data/customer_loyalty_newseq.csv +31 -31
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +146 -146
- teradataml/data/decisionforest_example.json +37 -37
- teradataml/data/decisionforestpredict_example.json +38 -38
- teradataml/data/decisiontree_example.json +21 -21
- teradataml/data/decisiontreepredict_example.json +45 -45
- teradataml/data/dfft2_size4_real.csv +17 -17
- teradataml/data/dfft2_test_matrix16.csv +17 -17
- teradataml/data/dfft2conv_real_4_4.csv +65 -65
- teradataml/data/diabetes.csv +443 -443
- teradataml/data/diabetes_test.csv +89 -89
- teradataml/data/dict_table.csv +5 -5
- teradataml/data/docperterm_table.csv +4 -4
- teradataml/data/docs/__init__.py +1 -1
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
- teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
- teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
- teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
- teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
- teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
- teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
- teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
- teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
- teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
- teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
- teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
- teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
- teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
- teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
- teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
- teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
- teradataml/data/dtw_example.json +17 -17
- teradataml/data/dtw_t1.csv +11 -11
- teradataml/data/dtw_t2.csv +4 -4
- teradataml/data/dwt2d_example.json +15 -15
- teradataml/data/dwt_example.json +14 -14
- teradataml/data/dwt_filter_dim.csv +5 -5
- teradataml/data/emission.csv +9 -9
- teradataml/data/emp_table_by_dept.csv +19 -19
- teradataml/data/employee_info.csv +4 -4
- teradataml/data/employee_table.csv +6 -6
- teradataml/data/excluding_event_table.csv +2 -2
- teradataml/data/finance_data.csv +6 -6
- teradataml/data/finance_data2.csv +61 -61
- teradataml/data/finance_data3.csv +93 -93
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -26
- teradataml/data/fmeasure_example.json +11 -11
- teradataml/data/followers_leaders.csv +10 -10
- teradataml/data/fpgrowth_example.json +12 -12
- teradataml/data/frequentpaths_example.json +29 -29
- teradataml/data/friends.csv +9 -9
- teradataml/data/fs_input.csv +33 -33
- teradataml/data/fs_input1.csv +33 -33
- teradataml/data/genData.csv +513 -513
- teradataml/data/geodataframe_example.json +39 -39
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -12
- teradataml/data/glm_example.json +56 -29
- teradataml/data/glml1l2_example.json +28 -28
- teradataml/data/glml1l2predict_example.json +54 -54
- teradataml/data/glmpredict_example.json +54 -54
- teradataml/data/gq_t1.csv +21 -21
- teradataml/data/hconvolve_complex_right.csv +5 -5
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
- teradataml/data/histogram_example.json +11 -11
- teradataml/data/hmmdecoder_example.json +78 -78
- teradataml/data/hmmevaluator_example.json +24 -24
- teradataml/data/hmmsupervised_example.json +10 -10
- teradataml/data/hmmunsupervised_example.json +7 -7
- teradataml/data/house_values.csv +12 -12
- teradataml/data/house_values2.csv +13 -13
- teradataml/data/housing_cat.csv +7 -7
- teradataml/data/housing_data.csv +9 -9
- teradataml/data/housing_test.csv +47 -47
- teradataml/data/housing_test_binary.csv +47 -47
- teradataml/data/housing_train.csv +493 -493
- teradataml/data/housing_train_attribute.csv +4 -4
- teradataml/data/housing_train_binary.csv +437 -437
- teradataml/data/housing_train_parameter.csv +2 -2
- teradataml/data/housing_train_response.csv +493 -493
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -370
- teradataml/data/ibm_stock1.csv +370 -370
- teradataml/data/identitymatch_example.json +21 -21
- teradataml/data/idf_table.csv +4 -4
- teradataml/data/impressions.csv +101 -101
- teradataml/data/inflation.csv +21 -21
- teradataml/data/initial.csv +3 -3
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +12 -12
- teradataml/data/insurance.csv +1339 -1339
- teradataml/data/interpolator_example.json +12 -12
- teradataml/data/iris_altinput.csv +481 -481
- teradataml/data/iris_attribute_output.csv +8 -8
- teradataml/data/iris_attribute_test.csv +121 -121
- teradataml/data/iris_attribute_train.csv +481 -481
- teradataml/data/iris_category_expect_predict.csv +31 -31
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -151
- teradataml/data/iris_response_train.csv +121 -121
- teradataml/data/iris_test.csv +31 -31
- teradataml/data/iris_train.csv +121 -121
- teradataml/data/join_table1.csv +4 -4
- teradataml/data/join_table2.csv +4 -4
- teradataml/data/jsons/anly_function_name.json +6 -6
- teradataml/data/jsons/byom/dataikupredict.json +147 -147
- teradataml/data/jsons/byom/datarobotpredict.json +146 -146
- teradataml/data/jsons/byom/h2opredict.json +194 -194
- teradataml/data/jsons/byom/onnxpredict.json +186 -186
- teradataml/data/jsons/byom/pmmlpredict.json +146 -146
- teradataml/data/jsons/paired_functions.json +435 -435
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
- teradataml/data/kmeans_example.json +22 -17
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +0 -0
- teradataml/data/knn_example.json +18 -18
- teradataml/data/knnrecommender_example.json +6 -6
- teradataml/data/knnrecommenderpredict_example.json +12 -12
- teradataml/data/lar_example.json +17 -17
- teradataml/data/larpredict_example.json +30 -30
- teradataml/data/lc_new_predictors.csv +5 -5
- teradataml/data/lc_new_reference.csv +9 -9
- teradataml/data/lda_example.json +8 -8
- teradataml/data/ldainference_example.json +14 -14
- teradataml/data/ldatopicsummary_example.json +8 -8
- teradataml/data/levendist_input.csv +13 -13
- teradataml/data/levenshteindistance_example.json +10 -10
- teradataml/data/linreg_example.json +9 -9
- teradataml/data/load_example_data.py +326 -323
- teradataml/data/loan_prediction.csv +295 -295
- teradataml/data/lungcancer.csv +138 -138
- teradataml/data/mappingdata.csv +12 -12
- teradataml/data/milk_timeseries.csv +157 -157
- teradataml/data/min_max_titanic.csv +4 -4
- teradataml/data/minhash_example.json +6 -6
- teradataml/data/ml_ratings.csv +7547 -7547
- teradataml/data/ml_ratings_10.csv +2445 -2445
- teradataml/data/model1_table.csv +5 -5
- teradataml/data/model2_table.csv +5 -5
- teradataml/data/models/iris_db_glm_model.pmml +56 -56
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
- teradataml/data/modularity_example.json +12 -12
- teradataml/data/movavg_example.json +7 -7
- teradataml/data/mtx1.csv +7 -7
- teradataml/data/mtx2.csv +13 -13
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -9
- teradataml/data/naivebayes_example.json +9 -9
- teradataml/data/naivebayespredict_example.json +19 -19
- teradataml/data/naivebayestextclassifier2_example.json +6 -6
- teradataml/data/naivebayestextclassifier_example.json +8 -8
- teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
- teradataml/data/name_Find_configure.csv +10 -10
- teradataml/data/namedentityfinder_example.json +14 -14
- teradataml/data/namedentityfinderevaluator_example.json +10 -10
- teradataml/data/namedentityfindertrainer_example.json +6 -6
- teradataml/data/nb_iris_input_test.csv +31 -31
- teradataml/data/nb_iris_input_train.csv +121 -121
- teradataml/data/nbp_iris_model.csv +13 -13
- teradataml/data/ner_extractor_text.csv +2 -2
- teradataml/data/ner_sports_test2.csv +29 -29
- teradataml/data/ner_sports_train.csv +501 -501
- teradataml/data/nerevaluator_example.json +5 -5
- teradataml/data/nerextractor_example.json +18 -18
- teradataml/data/nermem_sports_test.csv +17 -17
- teradataml/data/nermem_sports_train.csv +50 -50
- teradataml/data/nertrainer_example.json +6 -6
- teradataml/data/ngrams_example.json +6 -6
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
- teradataml/data/npath_example.json +23 -23
- teradataml/data/ntree_example.json +14 -14
- teradataml/data/numeric_strings.csv +4 -4
- teradataml/data/numerics.csv +4 -4
- teradataml/data/ocean_buoy.csv +17 -17
- teradataml/data/ocean_buoy2.csv +17 -17
- teradataml/data/ocean_buoys.csv +27 -27
- teradataml/data/ocean_buoys2.csv +10 -10
- teradataml/data/ocean_buoys_nonpti.csv +28 -28
- teradataml/data/ocean_buoys_seq.csv +29 -29
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -4
- teradataml/data/orders1.csv +11 -11
- teradataml/data/orders1_12.csv +12 -12
- teradataml/data/orders_ex.csv +4 -4
- teradataml/data/pack_example.json +8 -8
- teradataml/data/package_tracking.csv +19 -19
- teradataml/data/package_tracking_pti.csv +18 -18
- teradataml/data/pagerank_example.json +13 -13
- teradataml/data/paragraphs_input.csv +6 -6
- teradataml/data/pathanalyzer_example.json +7 -7
- teradataml/data/pathgenerator_example.json +7 -7
- teradataml/data/phrases.csv +7 -7
- teradataml/data/pivot_example.json +8 -8
- teradataml/data/pivot_input.csv +22 -22
- teradataml/data/playerRating.csv +31 -31
- teradataml/data/postagger_example.json +6 -6
- teradataml/data/posttagger_output.csv +44 -44
- teradataml/data/production_data.csv +16 -16
- teradataml/data/production_data2.csv +7 -7
- teradataml/data/randomsample_example.json +31 -31
- teradataml/data/randomwalksample_example.json +8 -8
- teradataml/data/rank_table.csv +6 -6
- teradataml/data/ref_mobile_data.csv +4 -4
- teradataml/data/ref_mobile_data_dense.csv +2 -2
- teradataml/data/ref_url.csv +17 -17
- teradataml/data/restaurant_reviews.csv +7 -7
- teradataml/data/river_data.csv +145 -145
- teradataml/data/roc_example.json +7 -7
- teradataml/data/roc_input.csv +101 -101
- teradataml/data/rule_inputs.csv +6 -6
- teradataml/data/rule_table.csv +2 -2
- teradataml/data/sales.csv +7 -7
- teradataml/data/sales_transaction.csv +501 -501
- teradataml/data/salesdata.csv +342 -342
- teradataml/data/sample_cities.csv +2 -2
- teradataml/data/sample_shapes.csv +10 -10
- teradataml/data/sample_streets.csv +2 -2
- teradataml/data/sampling_example.json +15 -15
- teradataml/data/sax_example.json +8 -8
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -23
- teradataml/data/scale_housing.csv +11 -11
- teradataml/data/scale_housing_test.csv +6 -6
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -11
- teradataml/data/scalebypartition_example.json +13 -13
- teradataml/data/scalemap_example.json +13 -13
- teradataml/data/scalesummary_example.json +12 -12
- teradataml/data/score_category.csv +101 -101
- teradataml/data/score_summary.csv +4 -4
- teradataml/data/script_example.json +9 -9
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +15 -15
- teradataml/data/scripts/mapper_replace.py +15 -15
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
- teradataml/data/seeds.csv +10 -10
- teradataml/data/sentenceextractor_example.json +6 -6
- teradataml/data/sentiment_extract_input.csv +11 -11
- teradataml/data/sentiment_train.csv +16 -16
- teradataml/data/sentiment_word.csv +20 -20
- teradataml/data/sentiment_word_input.csv +19 -19
- teradataml/data/sentimentextractor_example.json +24 -24
- teradataml/data/sentimenttrainer_example.json +8 -8
- teradataml/data/sequence_table.csv +10 -10
- teradataml/data/seriessplitter_example.json +7 -7
- teradataml/data/sessionize_example.json +17 -17
- teradataml/data/sessionize_table.csv +116 -116
- teradataml/data/setop_test1.csv +24 -24
- teradataml/data/setop_test2.csv +22 -22
- teradataml/data/soc_nw_edges.csv +10 -10
- teradataml/data/soc_nw_vertices.csv +7 -7
- teradataml/data/souvenir_timeseries.csv +167 -167
- teradataml/data/sparse_iris_attribute.csv +5 -5
- teradataml/data/sparse_iris_test.csv +121 -121
- teradataml/data/sparse_iris_train.csv +601 -601
- teradataml/data/star1.csv +6 -6
- teradataml/data/state_transition.csv +5 -5
- teradataml/data/stock_data.csv +53 -53
- teradataml/data/stock_movement.csv +11 -11
- teradataml/data/stock_vol.csv +76 -76
- teradataml/data/stop_words.csv +8 -8
- teradataml/data/store_sales.csv +37 -37
- teradataml/data/stringsimilarity_example.json +7 -7
- teradataml/data/strsimilarity_input.csv +13 -13
- teradataml/data/students.csv +101 -101
- teradataml/data/svm_iris_input_test.csv +121 -121
- teradataml/data/svm_iris_input_train.csv +481 -481
- teradataml/data/svm_iris_model.csv +7 -7
- teradataml/data/svmdense_example.json +9 -9
- teradataml/data/svmdensepredict_example.json +18 -18
- teradataml/data/svmsparse_example.json +7 -7
- teradataml/data/svmsparsepredict_example.json +13 -13
- teradataml/data/svmsparsesummary_example.json +7 -7
- teradataml/data/target_mobile_data.csv +13 -13
- teradataml/data/target_mobile_data_dense.csv +5 -5
- teradataml/data/templatedata.csv +1201 -1201
- teradataml/data/templates/open_source_ml.json +9 -0
- teradataml/data/teradataml_example.json +150 -1
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -53
- teradataml/data/test_pacf_12.csv +37 -37
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -109
- teradataml/data/text_inputs.csv +6 -6
- teradataml/data/textchunker_example.json +7 -7
- teradataml/data/textclassifier_example.json +6 -6
- teradataml/data/textclassifier_input.csv +7 -7
- teradataml/data/textclassifiertrainer_example.json +6 -6
- teradataml/data/textmorph_example.json +5 -5
- teradataml/data/textparser_example.json +15 -15
- teradataml/data/texttagger_example.json +11 -11
- teradataml/data/texttokenizer_example.json +6 -6
- teradataml/data/texttrainer_input.csv +11 -11
- teradataml/data/tf_example.json +6 -6
- teradataml/data/tfidf_example.json +13 -13
- teradataml/data/tfidf_input1.csv +201 -201
- teradataml/data/tfidf_train.csv +6 -6
- teradataml/data/time_table1.csv +535 -535
- teradataml/data/time_table2.csv +14 -14
- teradataml/data/timeseriesdata.csv +1601 -1601
- teradataml/data/timeseriesdatasetsd4.csv +105 -105
- teradataml/data/titanic.csv +892 -892
- teradataml/data/token_table.csv +696 -696
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +27 -27
- teradataml/data/transformation_table.csv +5 -5
- teradataml/data/transformation_table_new.csv +1 -1
- teradataml/data/tv_spots.csv +16 -16
- teradataml/data/twod_climate_data.csv +117 -117
- teradataml/data/uaf_example.json +475 -475
- teradataml/data/univariatestatistics_example.json +8 -8
- teradataml/data/unpack_example.json +9 -9
- teradataml/data/unpivot_example.json +9 -9
- teradataml/data/unpivot_input.csv +8 -8
- teradataml/data/us_air_pass.csv +36 -36
- teradataml/data/us_population.csv +624 -624
- teradataml/data/us_states_shapes.csv +52 -52
- teradataml/data/varmax_example.json +17 -17
- teradataml/data/vectordistance_example.json +25 -25
- teradataml/data/ville_climatedata.csv +121 -121
- teradataml/data/ville_tempdata.csv +12 -12
- teradataml/data/ville_tempdata1.csv +12 -12
- teradataml/data/ville_temperature.csv +11 -11
- teradataml/data/waveletTable.csv +1605 -1605
- teradataml/data/waveletTable2.csv +1605 -1605
- teradataml/data/weightedmovavg_example.json +8 -8
- teradataml/data/wft_testing.csv +5 -5
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +5 -5
- teradataml/data/word_embed_input_table2.csv +4 -4
- teradataml/data/word_embed_model.csv +22 -22
- teradataml/data/words_input.csv +13 -13
- teradataml/data/xconvolve_complex_left.csv +6 -6
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
- teradataml/data/xgboost_example.json +35 -35
- teradataml/data/xgboostpredict_example.json +31 -31
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +1769 -1698
- teradataml/dataframe/data_transfer.py +2812 -2745
- teradataml/dataframe/dataframe.py +17630 -16946
- teradataml/dataframe/dataframe_utils.py +1875 -1740
- teradataml/dataframe/fastload.py +794 -603
- teradataml/dataframe/indexer.py +424 -424
- teradataml/dataframe/setop.py +1179 -1166
- teradataml/dataframe/sql.py +10174 -6432
- teradataml/dataframe/sql_function_parameters.py +439 -388
- teradataml/dataframe/sql_functions.py +652 -652
- teradataml/dataframe/sql_interfaces.py +220 -220
- teradataml/dataframe/vantage_function_types.py +674 -630
- teradataml/dataframe/window.py +693 -692
- teradataml/dbutils/__init__.py +3 -3
- teradataml/dbutils/dbutils.py +1167 -1150
- teradataml/dbutils/filemgr.py +267 -267
- teradataml/gen_ai/__init__.py +2 -2
- teradataml/gen_ai/convAI.py +472 -472
- teradataml/geospatial/__init__.py +3 -3
- teradataml/geospatial/geodataframe.py +1105 -1094
- teradataml/geospatial/geodataframecolumn.py +392 -387
- teradataml/geospatial/geometry_types.py +925 -925
- teradataml/hyperparameter_tuner/__init__.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
- teradataml/hyperparameter_tuner/utils.py +281 -187
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/sklearn/__init__.py +1 -0
- teradataml/opensource/sklearn/_class.py +255 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
- teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
- teradataml/opensource/sklearn/constants.py +54 -0
- teradataml/options/__init__.py +130 -124
- teradataml/options/configure.py +358 -336
- teradataml/options/display.py +176 -176
- teradataml/plot/__init__.py +2 -2
- teradataml/plot/axis.py +1388 -1388
- teradataml/plot/constants.py +15 -15
- teradataml/plot/figure.py +398 -398
- teradataml/plot/plot.py +760 -760
- teradataml/plot/query_generator.py +83 -83
- teradataml/plot/subplot.py +216 -216
- teradataml/scriptmgmt/UserEnv.py +3791 -3761
- teradataml/scriptmgmt/__init__.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +1719 -1604
- teradataml/series/series.py +532 -532
- teradataml/series/series_utils.py +71 -71
- teradataml/table_operators/Apply.py +949 -917
- teradataml/table_operators/Script.py +1718 -1982
- teradataml/table_operators/TableOperator.py +1255 -1616
- teradataml/table_operators/__init__.py +2 -3
- teradataml/table_operators/apply_query_generator.py +262 -262
- teradataml/table_operators/query_generator.py +507 -507
- teradataml/table_operators/table_operator_query_generator.py +460 -460
- teradataml/table_operators/table_operator_util.py +631 -639
- teradataml/table_operators/templates/dataframe_apply.template +184 -184
- teradataml/table_operators/templates/dataframe_map.template +176 -176
- teradataml/table_operators/templates/script_executor.template +170 -170
- teradataml/utils/dtypes.py +684 -684
- teradataml/utils/internal_buffer.py +84 -84
- teradataml/utils/print_versions.py +205 -205
- teradataml/utils/utils.py +410 -410
- teradataml/utils/validators.py +2277 -2115
- {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
- teradataml-20.0.0.1.dist-info/RECORD +1056 -0
- {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
- {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
- teradataml/analytics/mle/AdaBoost.py +0 -651
- teradataml/analytics/mle/AdaBoostPredict.py +0 -564
- teradataml/analytics/mle/Antiselect.py +0 -342
- teradataml/analytics/mle/Arima.py +0 -641
- teradataml/analytics/mle/ArimaPredict.py +0 -477
- teradataml/analytics/mle/Attribution.py +0 -1070
- teradataml/analytics/mle/Betweenness.py +0 -658
- teradataml/analytics/mle/Burst.py +0 -711
- teradataml/analytics/mle/CCM.py +0 -600
- teradataml/analytics/mle/CCMPrepare.py +0 -324
- teradataml/analytics/mle/CFilter.py +0 -460
- teradataml/analytics/mle/ChangePointDetection.py +0 -572
- teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
- teradataml/analytics/mle/Closeness.py +0 -737
- teradataml/analytics/mle/ConfusionMatrix.py +0 -420
- teradataml/analytics/mle/Correlation.py +0 -477
- teradataml/analytics/mle/Correlation2.py +0 -573
- teradataml/analytics/mle/CoxHazardRatio.py +0 -679
- teradataml/analytics/mle/CoxPH.py +0 -556
- teradataml/analytics/mle/CoxSurvival.py +0 -478
- teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
- teradataml/analytics/mle/DTW.py +0 -623
- teradataml/analytics/mle/DWT.py +0 -564
- teradataml/analytics/mle/DWT2D.py +0 -599
- teradataml/analytics/mle/DecisionForest.py +0 -716
- teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
- teradataml/analytics/mle/DecisionForestPredict.py +0 -561
- teradataml/analytics/mle/DecisionTree.py +0 -830
- teradataml/analytics/mle/DecisionTreePredict.py +0 -528
- teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
- teradataml/analytics/mle/FMeasure.py +0 -402
- teradataml/analytics/mle/FPGrowth.py +0 -734
- teradataml/analytics/mle/FrequentPaths.py +0 -695
- teradataml/analytics/mle/GLM.py +0 -558
- teradataml/analytics/mle/GLML1L2.py +0 -547
- teradataml/analytics/mle/GLML1L2Predict.py +0 -519
- teradataml/analytics/mle/GLMPredict.py +0 -529
- teradataml/analytics/mle/HMMDecoder.py +0 -945
- teradataml/analytics/mle/HMMEvaluator.py +0 -901
- teradataml/analytics/mle/HMMSupervised.py +0 -521
- teradataml/analytics/mle/HMMUnsupervised.py +0 -572
- teradataml/analytics/mle/Histogram.py +0 -561
- teradataml/analytics/mle/IDWT.py +0 -476
- teradataml/analytics/mle/IDWT2D.py +0 -493
- teradataml/analytics/mle/IdentityMatch.py +0 -763
- teradataml/analytics/mle/Interpolator.py +0 -918
- teradataml/analytics/mle/KMeans.py +0 -485
- teradataml/analytics/mle/KNN.py +0 -627
- teradataml/analytics/mle/KNNRecommender.py +0 -488
- teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
- teradataml/analytics/mle/LAR.py +0 -439
- teradataml/analytics/mle/LARPredict.py +0 -478
- teradataml/analytics/mle/LDA.py +0 -548
- teradataml/analytics/mle/LDAInference.py +0 -492
- teradataml/analytics/mle/LDATopicSummary.py +0 -464
- teradataml/analytics/mle/LevenshteinDistance.py +0 -450
- teradataml/analytics/mle/LinReg.py +0 -433
- teradataml/analytics/mle/LinRegPredict.py +0 -438
- teradataml/analytics/mle/MinHash.py +0 -544
- teradataml/analytics/mle/Modularity.py +0 -587
- teradataml/analytics/mle/NEREvaluator.py +0 -410
- teradataml/analytics/mle/NERExtractor.py +0 -595
- teradataml/analytics/mle/NERTrainer.py +0 -458
- teradataml/analytics/mle/NGrams.py +0 -570
- teradataml/analytics/mle/NPath.py +0 -634
- teradataml/analytics/mle/NTree.py +0 -549
- teradataml/analytics/mle/NaiveBayes.py +0 -462
- teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
- teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
- teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
- teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
- teradataml/analytics/mle/NamedEntityFinder.py +0 -529
- teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
- teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
- teradataml/analytics/mle/POSTagger.py +0 -417
- teradataml/analytics/mle/Pack.py +0 -411
- teradataml/analytics/mle/PageRank.py +0 -535
- teradataml/analytics/mle/PathAnalyzer.py +0 -426
- teradataml/analytics/mle/PathGenerator.py +0 -367
- teradataml/analytics/mle/PathStart.py +0 -464
- teradataml/analytics/mle/PathSummarizer.py +0 -470
- teradataml/analytics/mle/Pivot.py +0 -471
- teradataml/analytics/mle/ROC.py +0 -425
- teradataml/analytics/mle/RandomSample.py +0 -637
- teradataml/analytics/mle/RandomWalkSample.py +0 -490
- teradataml/analytics/mle/SAX.py +0 -779
- teradataml/analytics/mle/SVMDense.py +0 -677
- teradataml/analytics/mle/SVMDensePredict.py +0 -536
- teradataml/analytics/mle/SVMDenseSummary.py +0 -437
- teradataml/analytics/mle/SVMSparse.py +0 -557
- teradataml/analytics/mle/SVMSparsePredict.py +0 -553
- teradataml/analytics/mle/SVMSparseSummary.py +0 -435
- teradataml/analytics/mle/Sampling.py +0 -549
- teradataml/analytics/mle/Scale.py +0 -565
- teradataml/analytics/mle/ScaleByPartition.py +0 -496
- teradataml/analytics/mle/ScaleMap.py +0 -378
- teradataml/analytics/mle/ScaleSummary.py +0 -320
- teradataml/analytics/mle/SentenceExtractor.py +0 -363
- teradataml/analytics/mle/SentimentEvaluator.py +0 -432
- teradataml/analytics/mle/SentimentExtractor.py +0 -578
- teradataml/analytics/mle/SentimentTrainer.py +0 -405
- teradataml/analytics/mle/SeriesSplitter.py +0 -641
- teradataml/analytics/mle/Sessionize.py +0 -475
- teradataml/analytics/mle/SimpleMovAvg.py +0 -397
- teradataml/analytics/mle/StringSimilarity.py +0 -425
- teradataml/analytics/mle/TF.py +0 -389
- teradataml/analytics/mle/TFIDF.py +0 -504
- teradataml/analytics/mle/TextChunker.py +0 -414
- teradataml/analytics/mle/TextClassifier.py +0 -399
- teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
- teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
- teradataml/analytics/mle/TextMorph.py +0 -494
- teradataml/analytics/mle/TextParser.py +0 -623
- teradataml/analytics/mle/TextTagger.py +0 -530
- teradataml/analytics/mle/TextTokenizer.py +0 -502
- teradataml/analytics/mle/UnivariateStatistics.py +0 -488
- teradataml/analytics/mle/Unpack.py +0 -526
- teradataml/analytics/mle/Unpivot.py +0 -438
- teradataml/analytics/mle/VarMax.py +0 -776
- teradataml/analytics/mle/VectorDistance.py +0 -762
- teradataml/analytics/mle/WeightedMovAvg.py +0 -400
- teradataml/analytics/mle/XGBoost.py +0 -842
- teradataml/analytics/mle/XGBoostPredict.py +0 -627
- teradataml/analytics/mle/__init__.py +0 -123
- teradataml/analytics/mle/json/adaboost_mle.json +0 -135
- teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
- teradataml/analytics/mle/json/antiselect_mle.json +0 -34
- teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
- teradataml/analytics/mle/json/arima_mle.json +0 -172
- teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
- teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
- teradataml/analytics/mle/json/betweenness_mle.json +0 -97
- teradataml/analytics/mle/json/burst_mle.json +0 -140
- teradataml/analytics/mle/json/ccm_mle.json +0 -124
- teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
- teradataml/analytics/mle/json/cfilter_mle.json +0 -93
- teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
- teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
- teradataml/analytics/mle/json/closeness_mle.json +0 -104
- teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
- teradataml/analytics/mle/json/correlation_mle.json +0 -86
- teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
- teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
- teradataml/analytics/mle/json/coxph_mle.json +0 -98
- teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
- teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
- teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
- teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
- teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
- teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
- teradataml/analytics/mle/json/dtw_mle.json +0 -97
- teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
- teradataml/analytics/mle/json/dwt_mle.json +0 -101
- teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
- teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
- teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
- teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
- teradataml/analytics/mle/json/glm_mle.json +0 -111
- teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
- teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
- teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/histogram_mle.json +0 -100
- teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
- teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
- teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
- teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
- teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
- teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
- teradataml/analytics/mle/json/idwt_mle.json +0 -66
- teradataml/analytics/mle/json/interpolator_mle.json +0 -151
- teradataml/analytics/mle/json/kmeans_mle.json +0 -97
- teradataml/analytics/mle/json/knn_mle.json +0 -141
- teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
- teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
- teradataml/analytics/mle/json/lar_mle.json +0 -78
- teradataml/analytics/mle/json/larpredict_mle.json +0 -69
- teradataml/analytics/mle/json/lda_mle.json +0 -130
- teradataml/analytics/mle/json/ldainference_mle.json +0 -78
- teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
- teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
- teradataml/analytics/mle/json/linreg_mle.json +0 -42
- teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
- teradataml/analytics/mle/json/minhash_mle.json +0 -113
- teradataml/analytics/mle/json/modularity_mle.json +0 -91
- teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
- teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
- teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
- teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
- teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
- teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
- teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
- teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
- teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
- teradataml/analytics/mle/json/ngrams_mle.json +0 -137
- teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
- teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
- teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
- teradataml/analytics/mle/json/pack_mle.json +0 -58
- teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
- teradataml/analytics/mle/json/pagerank_mle.json +0 -81
- teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
- teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
- teradataml/analytics/mle/json/pathstart_mle.json +0 -62
- teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
- teradataml/analytics/mle/json/pivoting_mle.json +0 -71
- teradataml/analytics/mle/json/postagger_mle.json +0 -51
- teradataml/analytics/mle/json/randomsample_mle.json +0 -131
- teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
- teradataml/analytics/mle/json/roc_mle.json +0 -73
- teradataml/analytics/mle/json/sampling_mle.json +0 -75
- teradataml/analytics/mle/json/sax_mle.json +0 -154
- teradataml/analytics/mle/json/scale_mle.json +0 -93
- teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
- teradataml/analytics/mle/json/scalemap_mle.json +0 -44
- teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
- teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
- teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
- teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
- teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
- teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
- teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
- teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
- teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
- teradataml/analytics/mle/json/svmdense_mle.json +0 -165
- teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
- teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
- teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
- teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
- teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
- teradataml/analytics/mle/json/textchunker_mle.json +0 -40
- teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
- teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
- teradataml/analytics/mle/json/textmorph_mle.json +0 -63
- teradataml/analytics/mle/json/textparser_mle.json +0 -166
- teradataml/analytics/mle/json/texttagger_mle.json +0 -81
- teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
- teradataml/analytics/mle/json/tf_mle.json +0 -33
- teradataml/analytics/mle/json/tfidf_mle.json +0 -34
- teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
- teradataml/analytics/mle/json/unpack_mle.json +0 -91
- teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
- teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
- teradataml/analytics/mle/json/varmax_mle.json +0 -176
- teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
- teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
- teradataml/analytics/mle/json/xgboost_mle.json +0 -178
- teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
- teradataml/analytics/sqle/Antiselect.py +0 -321
- teradataml/analytics/sqle/Attribution.py +0 -603
- teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
- teradataml/analytics/sqle/GLMPredict.py +0 -430
- teradataml/analytics/sqle/MovingAverage.py +0 -543
- teradataml/analytics/sqle/NGramSplitter.py +0 -548
- teradataml/analytics/sqle/NPath.py +0 -632
- teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
- teradataml/analytics/sqle/Pack.py +0 -388
- teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
- teradataml/analytics/sqle/Sessionize.py +0 -390
- teradataml/analytics/sqle/StringSimilarity.py +0 -400
- teradataml/analytics/sqle/Unpack.py +0 -503
- teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
- teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
- teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
- teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
- teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
- teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
- teradataml/analytics/sqle/json/npath_sqle.json +0 -67
- teradataml/analytics/sqle/json/pack_sqle.json +0 -47
- teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
- teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
- teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
- teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
- teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
- teradataml/catalog/model_cataloging.py +0 -980
- teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
- teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
- teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
- teradataml/table_operators/sandbox_container_util.py +0 -643
- teradataml-17.20.0.7.dist-info/RECORD +0 -1280
- {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb
CHANGED
|
@@ -1,1455 +1,1455 @@
|
|
|
1
|
-
{
|
|
2
|
-
"cells": [
|
|
3
|
-
{
|
|
4
|
-
"cell_type": "markdown",
|
|
5
|
-
"metadata": {},
|
|
6
|
-
"source": [
|
|
7
|
-
"### Disclaimer\n",
|
|
8
|
-
"Please note, the Vantage Functions via SQLAlchemy feature is a preview/beta code release with limited functionality (the “Code”). As such, you acknowledge that the Code is experimental in nature and that the Code is provided “AS IS” and may not be functional on any machine or in any environment. TERADATA DISCLAIMS ALL WARRANTIES RELATING TO THE CODE, EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES AGAINST INFRINGEMENT OF THIRD-PARTY RIGHTS, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.\n",
|
|
9
|
-
"\n",
|
|
10
|
-
"TERADATA SHALL NOT BE RESPONSIBLE OR LIABLE WITH RESPECT TO ANY SUBJECT MATTER OF THE CODE UNDER ANY CONTRACT, NEGLIGENCE, STRICT LIABILITY OR OTHER THEORY \n",
|
|
11
|
-
" (A) FOR LOSS OR INACCURACY OF DATA OR COST OF PROCUREMENT OF SUBSTITUTE GOODS, SERVICES OR TECHNOLOGY, OR \n",
|
|
12
|
-
" (B) FOR ANY INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO LOSS OF REVENUES AND LOSS OF PROFITS. TERADATA SHALL NOT BE RESPONSIBLE FOR ANY MATTER BEYOND ITS REASONABLE CONTROL.\n",
|
|
13
|
-
"\n",
|
|
14
|
-
"Notwithstanding anything to the contrary: \n",
|
|
15
|
-
" (a) Teradata will have no obligation of any kind with respect to any Code-related comments, suggestions, design changes or improvements that you elect to provide to Teradata in either verbal or written form (collectively, “Feedback”), and \n",
|
|
16
|
-
" (b) Teradata and its affiliates are hereby free to use any ideas, concepts, know-how or techniques, in whole or in part, contained in Feedback: \n",
|
|
17
|
-
" (i) for any purpose whatsoever, including developing, manufacturing, and/or marketing products and/or services incorporating Feedback in whole or in part, and \n",
|
|
18
|
-
" (ii) without any restrictions or limitations, including requiring the payment of any license fees, royalties, or other consideration. "
|
|
19
|
-
]
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"cell_type": "code",
|
|
23
|
-
"execution_count": 1,
|
|
24
|
-
"metadata": {},
|
|
25
|
-
"outputs": [],
|
|
26
|
-
"source": [
|
|
27
|
-
"# In this notebook, we will be covering examples for following Regular Aggregate Functions\n",
|
|
28
|
-
"# SQL Documentation: https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/c2fX4dzxCcDJFKqXbyQtTA\n",
|
|
29
|
-
" # 1. avg/average/ave\n",
|
|
30
|
-
" # 2. corr\n",
|
|
31
|
-
" # 3. count\n",
|
|
32
|
-
" # 4. covar_pop\n",
|
|
33
|
-
" # 5. covar_samp\n",
|
|
34
|
-
" # 6. var_pop\n",
|
|
35
|
-
" # 7. var_samp\n",
|
|
36
|
-
" # 8. kurtosis\n",
|
|
37
|
-
" # 9. max\n",
|
|
38
|
-
" # 10. REGR_AVGX\n",
|
|
39
|
-
" # 11. REGR_AVGY\n",
|
|
40
|
-
" # 12. REGR_Intercept\n",
|
|
41
|
-
" # 13. REGR_SLOPE\n",
|
|
42
|
-
" # 14. REGR_R2\n",
|
|
43
|
-
" # 15. REGR_SXX\n",
|
|
44
|
-
" # 16. REGR_SXY\n",
|
|
45
|
-
" # 17. REGR_SYY\n",
|
|
46
|
-
" # 18. min\n",
|
|
47
|
-
" # 19. skew\n",
|
|
48
|
-
" # 20. stddev_pop\n",
|
|
49
|
-
" # 21. stddev_samp\n",
|
|
50
|
-
" # 22. sum"
|
|
51
|
-
]
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"cell_type": "code",
|
|
55
|
-
"execution_count": 2,
|
|
56
|
-
"metadata": {},
|
|
57
|
-
"outputs": [
|
|
58
|
-
{
|
|
59
|
-
"name": "stdout",
|
|
60
|
-
"output_type": "stream",
|
|
61
|
-
"text": [
|
|
62
|
-
"Hostname: ········\n",
|
|
63
|
-
"Username: ········\n",
|
|
64
|
-
"Password: ········\n",
|
|
65
|
-
"WARNING: Skipped loading table admissions_train since it already exists in the database.\n"
|
|
66
|
-
]
|
|
67
|
-
}
|
|
68
|
-
],
|
|
69
|
-
"source": [
|
|
70
|
-
"# Get the connection to the Vantage using create_context()\n",
|
|
71
|
-
"from teradataml import *\n",
|
|
72
|
-
"import getpass\n",
|
|
73
|
-
"td_context = create_context(host=getpass.getpass(\"Hostname: \"), username=getpass.getpass(\"Username: \"), password=getpass.getpass(\"Password: \"))\n",
|
|
74
|
-
"# Load the example dataset.\n",
|
|
75
|
-
"load_example_data(\"GLM\", [\"admissions_train\"])"
|
|
76
|
-
]
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"cell_type": "code",
|
|
80
|
-
"execution_count": 3,
|
|
81
|
-
"metadata": {},
|
|
82
|
-
"outputs": [
|
|
83
|
-
{
|
|
84
|
-
"data": {
|
|
85
|
-
"text/plain": [
|
|
86
|
-
" masters gpa stats programming admitted\n",
|
|
87
|
-
"id \n",
|
|
88
|
-
"15 yes 4.00 Advanced Advanced 1\n",
|
|
89
|
-
"7 yes 2.33 Novice Novice 1\n",
|
|
90
|
-
"22 yes 3.46 Novice Beginner 0\n",
|
|
91
|
-
"17 no 3.83 Advanced Advanced 1\n",
|
|
92
|
-
"13 no 4.00 Advanced Novice 1\n",
|
|
93
|
-
"38 yes 2.65 Advanced Beginner 1\n",
|
|
94
|
-
"26 yes 3.57 Advanced Advanced 1\n",
|
|
95
|
-
"5 no 3.44 Novice Novice 0\n",
|
|
96
|
-
"34 yes 3.85 Advanced Beginner 0\n",
|
|
97
|
-
"40 yes 3.95 Novice Beginner 0"
|
|
98
|
-
]
|
|
99
|
-
},
|
|
100
|
-
"execution_count": 3,
|
|
101
|
-
"metadata": {},
|
|
102
|
-
"output_type": "execute_result"
|
|
103
|
-
}
|
|
104
|
-
],
|
|
105
|
-
"source": [
|
|
106
|
-
"# Create the DataFrame on 'admissions_train' table\n",
|
|
107
|
-
"admissions_train = DataFrame(\"admissions_train\")\n",
|
|
108
|
-
"admissions_train"
|
|
109
|
-
]
|
|
110
|
-
},
|
|
111
|
-
{
|
|
112
|
-
"cell_type": "code",
|
|
113
|
-
"execution_count": 4,
|
|
114
|
-
"metadata": {},
|
|
115
|
-
"outputs": [],
|
|
116
|
-
"source": [
|
|
117
|
-
"def print_variables(df, columns):\n",
|
|
118
|
-
" print(\"Equivalent SQL: {}\".format(df.show_query()))\n",
|
|
119
|
-
" print(\"\\n\")\n",
|
|
120
|
-
" print(\" ************************* DataFrame ********************* \")\n",
|
|
121
|
-
" print(df)\n",
|
|
122
|
-
" print(\"\\n\\n\")\n",
|
|
123
|
-
" print(\" ************************* DataFrame.dtypes ********************* \")\n",
|
|
124
|
-
" print(df.dtypes)\n",
|
|
125
|
-
" print(\"\\n\\n\")\n",
|
|
126
|
-
" if isinstance(columns, str):\n",
|
|
127
|
-
" columns = [columns]\n",
|
|
128
|
-
" for col in columns:\n",
|
|
129
|
-
" coltype = df.__getattr__(col).type\n",
|
|
130
|
-
" if isinstance(coltype, sqlalchemy.sql.sqltypes.NullType):\n",
|
|
131
|
-
" coltype = \"NullType\"\n",
|
|
132
|
-
" print(\" '{}' Column Type: {}\".format(col, coltype))"
|
|
133
|
-
]
|
|
134
|
-
},
|
|
135
|
-
{
|
|
136
|
-
"cell_type": "markdown",
|
|
137
|
-
"metadata": {},
|
|
138
|
-
"source": [
|
|
139
|
-
"# Using Aggregate Functions from Teradata Vanatge with SQLAlchemy"
|
|
140
|
-
]
|
|
141
|
-
},
|
|
142
|
-
{
|
|
143
|
-
"cell_type": "code",
|
|
144
|
-
"execution_count": 5,
|
|
145
|
-
"metadata": {},
|
|
146
|
-
"outputs": [],
|
|
147
|
-
"source": [
|
|
148
|
-
"# Import func from SQLAlchemy to use the same for executing aggregate functions\n",
|
|
149
|
-
"from sqlalchemy import func"
|
|
150
|
-
]
|
|
151
|
-
},
|
|
152
|
-
{
|
|
153
|
-
"cell_type": "code",
|
|
154
|
-
"execution_count": 6,
|
|
155
|
-
"metadata": {},
|
|
156
|
-
"outputs": [],
|
|
157
|
-
"source": [
|
|
158
|
-
"# Before we move on with examples, one should read below just to understand how teradataml DataFrame and \n",
|
|
159
|
-
"# it's columns are used to create a SQLAlchemy ClauseElement/Expression.\n",
|
|
160
|
-
"\n",
|
|
161
|
-
"# Often in below examples one would see something like this: 'admissions_train.admitted.expression'\n",
|
|
162
|
-
"# Here in the above expression,\n",
|
|
163
|
-
"# 'admissions_train' is 'teradataml DataFrame'\n",
|
|
164
|
-
"# 'admitted' is 'column name' in teradataml DataFrame 'admissions_train'\n",
|
|
165
|
-
"# Thus, \n",
|
|
166
|
-
"# 'admissions_train.admitted' together forms a ColumnExpression.\n",
|
|
167
|
-
"# expression allows us to use teradata ColumnExpression to be treated as SQLAlchemy Expression.\n",
|
|
168
|
-
"# Thus,\n",
|
|
169
|
-
"# 'admissions_train.admitted.expression' gives us an expression that can be used with SQLAlchemy clauseElements."
|
|
170
|
-
]
|
|
171
|
-
},
|
|
172
|
-
{
|
|
173
|
-
"cell_type": "markdown",
|
|
174
|
-
"metadata": {},
|
|
175
|
-
"source": [
|
|
176
|
-
"## Avg/Average/Ave Function"
|
|
177
|
-
]
|
|
178
|
-
},
|
|
179
|
-
{
|
|
180
|
-
"cell_type": "code",
|
|
181
|
-
"execution_count": 7,
|
|
182
|
-
"metadata": {},
|
|
183
|
-
"outputs": [],
|
|
184
|
-
"source": [
|
|
185
|
-
"# Function returns the arithmetic average of all values in value_expression.\n",
|
|
186
|
-
"# Syntax:\n",
|
|
187
|
-
"# Avg(value_expression)"
|
|
188
|
-
]
|
|
189
|
-
},
|
|
190
|
-
{
|
|
191
|
-
"cell_type": "code",
|
|
192
|
-
"execution_count": 8,
|
|
193
|
-
"metadata": {},
|
|
194
|
-
"outputs": [
|
|
195
|
-
{
|
|
196
|
-
"data": {
|
|
197
|
-
"text/plain": [
|
|
198
|
-
"sqlalchemy.sql.functions.Function"
|
|
199
|
-
]
|
|
200
|
-
},
|
|
201
|
-
"execution_count": 8,
|
|
202
|
-
"metadata": {},
|
|
203
|
-
"output_type": "execute_result"
|
|
204
|
-
}
|
|
205
|
-
],
|
|
206
|
-
"source": [
|
|
207
|
-
"agg_func_ = func.avg(admissions_train.gpa.expression)\n",
|
|
208
|
-
"type(agg_func_)"
|
|
209
|
-
]
|
|
210
|
-
},
|
|
211
|
-
{
|
|
212
|
-
"cell_type": "code",
|
|
213
|
-
"execution_count": 9,
|
|
214
|
-
"metadata": {},
|
|
215
|
-
"outputs": [
|
|
216
|
-
{
|
|
217
|
-
"name": "stdout",
|
|
218
|
-
"output_type": "stream",
|
|
219
|
-
"text": [
|
|
220
|
-
"Equivalent SQL: select ave(admitted) AS ave_admitted_, average(admitted) AS average_admitted_, avg(gpa) AS avg_gpa_ from \"admissions_train\"\n",
|
|
221
|
-
"\n",
|
|
222
|
-
"\n",
|
|
223
|
-
" ************************* DataFrame ********************* \n",
|
|
224
|
-
" ave_admitted_ average_admitted_ avg_gpa_\n",
|
|
225
|
-
"0 0.65 0.65 3.54175\n",
|
|
226
|
-
"\n",
|
|
227
|
-
"\n",
|
|
228
|
-
"\n",
|
|
229
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
230
|
-
"ave_admitted_ float\n",
|
|
231
|
-
"average_admitted_ float\n",
|
|
232
|
-
"avg_gpa_ float\n",
|
|
233
|
-
"\n",
|
|
234
|
-
"\n",
|
|
235
|
-
"\n",
|
|
236
|
-
" 'avg_gpa_' Column Type: FLOAT\n",
|
|
237
|
-
" 'average_admitted_' Column Type: FLOAT\n",
|
|
238
|
-
" 'ave_admitted_' Column Type: FLOAT\n"
|
|
239
|
-
]
|
|
240
|
-
}
|
|
241
|
-
],
|
|
242
|
-
"source": [
|
|
243
|
-
"df = admissions_train.assign(True, avg_gpa_=agg_func_, \n",
|
|
244
|
-
" average_admitted_=func.average(admissions_train.admitted.expression),\n",
|
|
245
|
-
" ave_admitted_=func.ave(admissions_train.admitted.expression))\n",
|
|
246
|
-
"print_variables(df, [\"avg_gpa_\", \"average_admitted_\", \"ave_admitted_\"])"
|
|
247
|
-
]
|
|
248
|
-
},
|
|
249
|
-
{
|
|
250
|
-
"cell_type": "markdown",
|
|
251
|
-
"metadata": {},
|
|
252
|
-
"source": [
|
|
253
|
-
"## CORR Function"
|
|
254
|
-
]
|
|
255
|
-
},
|
|
256
|
-
{
|
|
257
|
-
"cell_type": "code",
|
|
258
|
-
"execution_count": 10,
|
|
259
|
-
"metadata": {},
|
|
260
|
-
"outputs": [],
|
|
261
|
-
"source": [
|
|
262
|
-
"# Function returns the Sample Pearson product moment correlation coefficient of its arguments for all non-null data point pairs.\n",
|
|
263
|
-
"# Syntax:\n",
|
|
264
|
-
"# Corr(value_expression1, value_expression2)"
|
|
265
|
-
]
|
|
266
|
-
},
|
|
267
|
-
{
|
|
268
|
-
"cell_type": "code",
|
|
269
|
-
"execution_count": 11,
|
|
270
|
-
"metadata": {},
|
|
271
|
-
"outputs": [
|
|
272
|
-
{
|
|
273
|
-
"name": "stdout",
|
|
274
|
-
"output_type": "stream",
|
|
275
|
-
"text": [
|
|
276
|
-
"Equivalent SQL: select corr(admitted, gpa) AS corr_numeric_ from \"admissions_train\"\n",
|
|
277
|
-
"\n",
|
|
278
|
-
"\n",
|
|
279
|
-
" ************************* DataFrame ********************* \n",
|
|
280
|
-
" corr_numeric_\n",
|
|
281
|
-
"0 -0.022265\n",
|
|
282
|
-
"\n",
|
|
283
|
-
"\n",
|
|
284
|
-
"\n",
|
|
285
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
286
|
-
"corr_numeric_ float\n",
|
|
287
|
-
"\n",
|
|
288
|
-
"\n",
|
|
289
|
-
"\n",
|
|
290
|
-
" 'corr_numeric_' Column Type: FLOAT\n"
|
|
291
|
-
]
|
|
292
|
-
}
|
|
293
|
-
],
|
|
294
|
-
"source": [
|
|
295
|
-
"df = admissions_train.assign(True, \n",
|
|
296
|
-
" corr_numeric_=func.corr(admissions_train.admitted.expression, admissions_train.gpa.expression))\n",
|
|
297
|
-
"print_variables(df, [\"corr_numeric_\"])"
|
|
298
|
-
]
|
|
299
|
-
},
|
|
300
|
-
{
|
|
301
|
-
"cell_type": "markdown",
|
|
302
|
-
"metadata": {},
|
|
303
|
-
"source": [
|
|
304
|
-
"## Count Function"
|
|
305
|
-
]
|
|
306
|
-
},
|
|
307
|
-
{
|
|
308
|
-
"cell_type": "code",
|
|
309
|
-
"execution_count": 12,
|
|
310
|
-
"metadata": {},
|
|
311
|
-
"outputs": [],
|
|
312
|
-
"source": [
|
|
313
|
-
"# Function returns a column value that is the total number of qualified rows in value_expression.\n",
|
|
314
|
-
"# Syntax:\n",
|
|
315
|
-
"# Count(value_expression)"
|
|
316
|
-
]
|
|
317
|
-
},
|
|
318
|
-
{
|
|
319
|
-
"cell_type": "code",
|
|
320
|
-
"execution_count": 13,
|
|
321
|
-
"metadata": {},
|
|
322
|
-
"outputs": [
|
|
323
|
-
{
|
|
324
|
-
"name": "stdout",
|
|
325
|
-
"output_type": "stream",
|
|
326
|
-
"text": [
|
|
327
|
-
"Equivalent SQL: select count(admitted) AS assined_count_col_ from \"admissions_train\"\n",
|
|
328
|
-
"\n",
|
|
329
|
-
"\n",
|
|
330
|
-
" ************************* DataFrame ********************* \n",
|
|
331
|
-
" assined_count_col_\n",
|
|
332
|
-
"0 40\n",
|
|
333
|
-
"\n",
|
|
334
|
-
"\n",
|
|
335
|
-
"\n",
|
|
336
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
337
|
-
"assined_count_col_ int\n",
|
|
338
|
-
"\n",
|
|
339
|
-
"\n",
|
|
340
|
-
"\n",
|
|
341
|
-
" 'assined_count_col_' Column Type: INTEGER\n"
|
|
342
|
-
]
|
|
343
|
-
}
|
|
344
|
-
],
|
|
345
|
-
"source": [
|
|
346
|
-
"df = admissions_train.assign(True, assined_count_col_=func.count(admissions_train.admitted.expression))\n",
|
|
347
|
-
"print_variables(df, [\"assined_count_col_\"])"
|
|
348
|
-
]
|
|
349
|
-
},
|
|
350
|
-
{
|
|
351
|
-
"cell_type": "markdown",
|
|
352
|
-
"metadata": {},
|
|
353
|
-
"source": [
|
|
354
|
-
"## Covar_pop Function"
|
|
355
|
-
]
|
|
356
|
-
},
|
|
357
|
-
{
|
|
358
|
-
"cell_type": "code",
|
|
359
|
-
"execution_count": 14,
|
|
360
|
-
"metadata": {},
|
|
361
|
-
"outputs": [],
|
|
362
|
-
"source": [
|
|
363
|
-
"# Function returns the population covariance of its arguments for all non-null data point pairs.\n",
|
|
364
|
-
"# Syntax:\n",
|
|
365
|
-
"# Covar_pop(value_expression1, value_expression2)"
|
|
366
|
-
]
|
|
367
|
-
},
|
|
368
|
-
{
|
|
369
|
-
"cell_type": "code",
|
|
370
|
-
"execution_count": 15,
|
|
371
|
-
"metadata": {},
|
|
372
|
-
"outputs": [
|
|
373
|
-
{
|
|
374
|
-
"name": "stdout",
|
|
375
|
-
"output_type": "stream",
|
|
376
|
-
"text": [
|
|
377
|
-
"Equivalent SQL: select Covar_pop(admitted, gpa) AS \"assined_col_Covar_pop\" from \"admissions_train\"\n",
|
|
378
|
-
"\n",
|
|
379
|
-
"\n",
|
|
380
|
-
" ************************* DataFrame ********************* \n",
|
|
381
|
-
" assined_col_Covar_pop\n",
|
|
382
|
-
"0 -0.005387\n",
|
|
383
|
-
"\n",
|
|
384
|
-
"\n",
|
|
385
|
-
"\n",
|
|
386
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
387
|
-
"assined_col_Covar_pop float\n",
|
|
388
|
-
"\n",
|
|
389
|
-
"\n",
|
|
390
|
-
"\n",
|
|
391
|
-
" 'assined_col_Covar_pop' Column Type: FLOAT\n"
|
|
392
|
-
]
|
|
393
|
-
}
|
|
394
|
-
],
|
|
395
|
-
"source": [
|
|
396
|
-
"df = admissions_train.assign(True, \n",
|
|
397
|
-
" assined_col_Covar_pop=func.Covar_pop(admissions_train.admitted.expression, admissions_train.gpa.expression))\n",
|
|
398
|
-
"print_variables(df, [\"assined_col_Covar_pop\"])"
|
|
399
|
-
]
|
|
400
|
-
},
|
|
401
|
-
{
|
|
402
|
-
"cell_type": "markdown",
|
|
403
|
-
"metadata": {},
|
|
404
|
-
"source": [
|
|
405
|
-
"## Covar_samp Function"
|
|
406
|
-
]
|
|
407
|
-
},
|
|
408
|
-
{
|
|
409
|
-
"cell_type": "code",
|
|
410
|
-
"execution_count": 16,
|
|
411
|
-
"metadata": {},
|
|
412
|
-
"outputs": [],
|
|
413
|
-
"source": [
|
|
414
|
-
"# Function returns the sample covariance of its arguments for all non-null data point pairs.\n",
|
|
415
|
-
"# Syntax:\n",
|
|
416
|
-
"# Covar_samp(value_expression)"
|
|
417
|
-
]
|
|
418
|
-
},
|
|
419
|
-
{
|
|
420
|
-
"cell_type": "code",
|
|
421
|
-
"execution_count": 17,
|
|
422
|
-
"metadata": {},
|
|
423
|
-
"outputs": [
|
|
424
|
-
{
|
|
425
|
-
"name": "stdout",
|
|
426
|
-
"output_type": "stream",
|
|
427
|
-
"text": [
|
|
428
|
-
"Equivalent SQL: select Covar_samp(admitted, gpa) AS \"assined_col_Covar_samp\" from \"admissions_train\"\n",
|
|
429
|
-
"\n",
|
|
430
|
-
"\n",
|
|
431
|
-
" ************************* DataFrame ********************* \n",
|
|
432
|
-
" assined_col_Covar_samp\n",
|
|
433
|
-
"0 -0.005526\n",
|
|
434
|
-
"\n",
|
|
435
|
-
"\n",
|
|
436
|
-
"\n",
|
|
437
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
438
|
-
"assined_col_Covar_samp float\n",
|
|
439
|
-
"\n",
|
|
440
|
-
"\n",
|
|
441
|
-
"\n",
|
|
442
|
-
" 'assined_col_Covar_samp' Column Type: FLOAT\n"
|
|
443
|
-
]
|
|
444
|
-
}
|
|
445
|
-
],
|
|
446
|
-
"source": [
|
|
447
|
-
"df = admissions_train.assign(True, \n",
|
|
448
|
-
" assined_col_Covar_samp=func.Covar_samp(admissions_train.admitted.expression, admissions_train.gpa.expression))\n",
|
|
449
|
-
"print_variables(df, [\"assined_col_Covar_samp\"])"
|
|
450
|
-
]
|
|
451
|
-
},
|
|
452
|
-
{
|
|
453
|
-
"cell_type": "markdown",
|
|
454
|
-
"metadata": {},
|
|
455
|
-
"source": [
|
|
456
|
-
"## Kurtosis Function"
|
|
457
|
-
]
|
|
458
|
-
},
|
|
459
|
-
{
|
|
460
|
-
"cell_type": "code",
|
|
461
|
-
"execution_count": 18,
|
|
462
|
-
"metadata": {},
|
|
463
|
-
"outputs": [],
|
|
464
|
-
"source": [
|
|
465
|
-
"# Function returns the kurtosis of the distribution of value_expression.\n",
|
|
466
|
-
"# Syntax:\n",
|
|
467
|
-
"# Kurtosis(value_expression)"
|
|
468
|
-
]
|
|
469
|
-
},
|
|
470
|
-
{
|
|
471
|
-
"cell_type": "code",
|
|
472
|
-
"execution_count": 19,
|
|
473
|
-
"metadata": {},
|
|
474
|
-
"outputs": [
|
|
475
|
-
{
|
|
476
|
-
"name": "stdout",
|
|
477
|
-
"output_type": "stream",
|
|
478
|
-
"text": [
|
|
479
|
-
"Equivalent SQL: select Kurtosis(gpa) AS \"assined_col_Kurtosis_num\" from \"admissions_train\"\n",
|
|
480
|
-
"\n",
|
|
481
|
-
"\n",
|
|
482
|
-
" ************************* DataFrame ********************* \n",
|
|
483
|
-
" assined_col_Kurtosis_num\n",
|
|
484
|
-
"0 4.052659\n",
|
|
485
|
-
"\n",
|
|
486
|
-
"\n",
|
|
487
|
-
"\n",
|
|
488
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
489
|
-
"assined_col_Kurtosis_num float\n",
|
|
490
|
-
"\n",
|
|
491
|
-
"\n",
|
|
492
|
-
"\n",
|
|
493
|
-
" 'assined_col_Kurtosis_num' Column Type: FLOAT\n"
|
|
494
|
-
]
|
|
495
|
-
}
|
|
496
|
-
],
|
|
497
|
-
"source": [
|
|
498
|
-
"df = admissions_train.assign(True, assined_col_Kurtosis_num=func.Kurtosis(admissions_train.gpa.expression))\n",
|
|
499
|
-
"print_variables(df, [\"assined_col_Kurtosis_num\"])"
|
|
500
|
-
]
|
|
501
|
-
},
|
|
502
|
-
{
|
|
503
|
-
"cell_type": "markdown",
|
|
504
|
-
"metadata": {},
|
|
505
|
-
"source": [
|
|
506
|
-
"## max/maximum Function"
|
|
507
|
-
]
|
|
508
|
-
},
|
|
509
|
-
{
|
|
510
|
-
"cell_type": "code",
|
|
511
|
-
"execution_count": 20,
|
|
512
|
-
"metadata": {},
|
|
513
|
-
"outputs": [],
|
|
514
|
-
"source": [
|
|
515
|
-
"# Function returns a column value that is the maximum value for value_expression.\n",
|
|
516
|
-
"# Syntax:\n",
|
|
517
|
-
"# max(value_expression)"
|
|
518
|
-
]
|
|
519
|
-
},
|
|
520
|
-
{
|
|
521
|
-
"cell_type": "code",
|
|
522
|
-
"execution_count": 21,
|
|
523
|
-
"metadata": {},
|
|
524
|
-
"outputs": [
|
|
525
|
-
{
|
|
526
|
-
"name": "stdout",
|
|
527
|
-
"output_type": "stream",
|
|
528
|
-
"text": [
|
|
529
|
-
"Equivalent SQL: select max(gpa) AS assined_col_max, maximum(stats) AS assined_col_maximum from \"admissions_train\"\n",
|
|
530
|
-
"\n",
|
|
531
|
-
"\n",
|
|
532
|
-
" ************************* DataFrame ********************* \n",
|
|
533
|
-
" assined_col_max assined_col_maximum\n",
|
|
534
|
-
"0 4.0 Novice\n",
|
|
535
|
-
"\n",
|
|
536
|
-
"\n",
|
|
537
|
-
"\n",
|
|
538
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
539
|
-
"assined_col_max float\n",
|
|
540
|
-
"assined_col_maximum str\n",
|
|
541
|
-
"\n",
|
|
542
|
-
"\n",
|
|
543
|
-
"\n",
|
|
544
|
-
" 'assined_col_maximum' Column Type: VARCHAR\n",
|
|
545
|
-
" 'assined_col_max' Column Type: FLOAT\n"
|
|
546
|
-
]
|
|
547
|
-
}
|
|
548
|
-
],
|
|
549
|
-
"source": [
|
|
550
|
-
"df = admissions_train.assign(True, \n",
|
|
551
|
-
" assined_col_max=func.max(admissions_train.gpa.expression),\n",
|
|
552
|
-
" assined_col_maximum=func.maximum(admissions_train.stats.expression))\n",
|
|
553
|
-
"print_variables(df, [\"assined_col_maximum\", \"assined_col_max\"])"
|
|
554
|
-
]
|
|
555
|
-
},
|
|
556
|
-
{
|
|
557
|
-
"cell_type": "markdown",
|
|
558
|
-
"metadata": {},
|
|
559
|
-
"source": [
|
|
560
|
-
"## min/minimum Function"
|
|
561
|
-
]
|
|
562
|
-
},
|
|
563
|
-
{
|
|
564
|
-
"cell_type": "code",
|
|
565
|
-
"execution_count": 22,
|
|
566
|
-
"metadata": {},
|
|
567
|
-
"outputs": [],
|
|
568
|
-
"source": [
|
|
569
|
-
"# Function returns a column value that is the minimum value for value_expression.\n",
|
|
570
|
-
"# Syntax:\n",
|
|
571
|
-
"# min(value_expression)"
|
|
572
|
-
]
|
|
573
|
-
},
|
|
574
|
-
{
|
|
575
|
-
"cell_type": "code",
|
|
576
|
-
"execution_count": 23,
|
|
577
|
-
"metadata": {},
|
|
578
|
-
"outputs": [
|
|
579
|
-
{
|
|
580
|
-
"name": "stdout",
|
|
581
|
-
"output_type": "stream",
|
|
582
|
-
"text": [
|
|
583
|
-
"Equivalent SQL: select min(gpa) AS assined_col_min, minimum(stats) AS assined_col_minimum from \"admissions_train\"\n",
|
|
584
|
-
"\n",
|
|
585
|
-
"\n",
|
|
586
|
-
" ************************* DataFrame ********************* \n",
|
|
587
|
-
" assined_col_min assined_col_minimum\n",
|
|
588
|
-
"0 1.87 Advanced\n",
|
|
589
|
-
"\n",
|
|
590
|
-
"\n",
|
|
591
|
-
"\n",
|
|
592
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
593
|
-
"assined_col_min float\n",
|
|
594
|
-
"assined_col_minimum str\n",
|
|
595
|
-
"\n",
|
|
596
|
-
"\n",
|
|
597
|
-
"\n",
|
|
598
|
-
" 'assined_col_min' Column Type: FLOAT\n",
|
|
599
|
-
" 'assined_col_minimum' Column Type: VARCHAR\n"
|
|
600
|
-
]
|
|
601
|
-
}
|
|
602
|
-
],
|
|
603
|
-
"source": [
|
|
604
|
-
"df = admissions_train.assign(True, \n",
|
|
605
|
-
" assined_col_min=func.min(admissions_train.gpa.expression),\n",
|
|
606
|
-
" assined_col_minimum=func.minimum(admissions_train.stats.expression))\n",
|
|
607
|
-
"print_variables(df, [\"assined_col_min\", \"assined_col_minimum\"])"
|
|
608
|
-
]
|
|
609
|
-
},
|
|
610
|
-
{
|
|
611
|
-
"cell_type": "markdown",
|
|
612
|
-
"metadata": {},
|
|
613
|
-
"source": [
|
|
614
|
-
"## REGR_AVGX Function"
|
|
615
|
-
]
|
|
616
|
-
},
|
|
617
|
-
{
|
|
618
|
-
"cell_type": "code",
|
|
619
|
-
"execution_count": 24,
|
|
620
|
-
"metadata": {},
|
|
621
|
-
"outputs": [],
|
|
622
|
-
"source": [
|
|
623
|
-
"# Function returns the mean of the independent_variable_expression for all non-null data pairs of the \n",
|
|
624
|
-
"# dependent and independent variable arguments.\n",
|
|
625
|
-
"# Syntax:\n",
|
|
626
|
-
"# REGR_AVGX(dependent_value_expression, independent_value_expression)"
|
|
627
|
-
]
|
|
628
|
-
},
|
|
629
|
-
{
|
|
630
|
-
"cell_type": "code",
|
|
631
|
-
"execution_count": 25,
|
|
632
|
-
"metadata": {},
|
|
633
|
-
"outputs": [
|
|
634
|
-
{
|
|
635
|
-
"name": "stdout",
|
|
636
|
-
"output_type": "stream",
|
|
637
|
-
"text": [
|
|
638
|
-
"Equivalent SQL: select regr_avgx(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
639
|
-
"\n",
|
|
640
|
-
"\n",
|
|
641
|
-
" ************************* DataFrame ********************* \n",
|
|
642
|
-
" assined_col_\n",
|
|
643
|
-
"0 3.54175\n",
|
|
644
|
-
"\n",
|
|
645
|
-
"\n",
|
|
646
|
-
"\n",
|
|
647
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
648
|
-
"assined_col_ float\n",
|
|
649
|
-
"\n",
|
|
650
|
-
"\n",
|
|
651
|
-
"\n",
|
|
652
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
653
|
-
]
|
|
654
|
-
}
|
|
655
|
-
],
|
|
656
|
-
"source": [
|
|
657
|
-
"df = admissions_train.assign(True, \n",
|
|
658
|
-
" assined_col_=func.regr_avgx(admissions_train.admitted.expression, \n",
|
|
659
|
-
" admissions_train.gpa.expression))\n",
|
|
660
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
661
|
-
]
|
|
662
|
-
},
|
|
663
|
-
{
|
|
664
|
-
"cell_type": "markdown",
|
|
665
|
-
"metadata": {},
|
|
666
|
-
"source": [
|
|
667
|
-
"## REGR_AVGY Function"
|
|
668
|
-
]
|
|
669
|
-
},
|
|
670
|
-
{
|
|
671
|
-
"cell_type": "code",
|
|
672
|
-
"execution_count": 26,
|
|
673
|
-
"metadata": {},
|
|
674
|
-
"outputs": [],
|
|
675
|
-
"source": [
|
|
676
|
-
"# Function returns the mean of the dependent_variable_expression for all non-null data pairs of the \n",
|
|
677
|
-
"# dependent and independent variable arguments.\n",
|
|
678
|
-
"# Syntax:\n",
|
|
679
|
-
"# REGR_AVGY(dependent_value_expression, independent_value_expression)"
|
|
680
|
-
]
|
|
681
|
-
},
|
|
682
|
-
{
|
|
683
|
-
"cell_type": "code",
|
|
684
|
-
"execution_count": 27,
|
|
685
|
-
"metadata": {},
|
|
686
|
-
"outputs": [
|
|
687
|
-
{
|
|
688
|
-
"name": "stdout",
|
|
689
|
-
"output_type": "stream",
|
|
690
|
-
"text": [
|
|
691
|
-
"Equivalent SQL: select regr_avgy(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
692
|
-
"\n",
|
|
693
|
-
"\n",
|
|
694
|
-
" ************************* DataFrame ********************* \n",
|
|
695
|
-
" assined_col_\n",
|
|
696
|
-
"0 0.65\n",
|
|
697
|
-
"\n",
|
|
698
|
-
"\n",
|
|
699
|
-
"\n",
|
|
700
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
701
|
-
"assined_col_ float\n",
|
|
702
|
-
"\n",
|
|
703
|
-
"\n",
|
|
704
|
-
"\n",
|
|
705
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
706
|
-
]
|
|
707
|
-
}
|
|
708
|
-
],
|
|
709
|
-
"source": [
|
|
710
|
-
"df = admissions_train.assign(True, \n",
|
|
711
|
-
" assined_col_=func.regr_avgy(admissions_train.admitted.expression, \n",
|
|
712
|
-
" admissions_train.gpa.expression))\n",
|
|
713
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
714
|
-
]
|
|
715
|
-
},
|
|
716
|
-
{
|
|
717
|
-
"cell_type": "markdown",
|
|
718
|
-
"metadata": {},
|
|
719
|
-
"source": [
|
|
720
|
-
"## REGR_Count Function"
|
|
721
|
-
]
|
|
722
|
-
},
|
|
723
|
-
{
|
|
724
|
-
"cell_type": "code",
|
|
725
|
-
"execution_count": 28,
|
|
726
|
-
"metadata": {},
|
|
727
|
-
"outputs": [],
|
|
728
|
-
"source": [
|
|
729
|
-
"# Function returns the count of all non-null data pairs of the dependent and independent variable arguments.\n",
|
|
730
|
-
"# Syntax:\n",
|
|
731
|
-
"# REGR_count(dependent_value_expression, independent_value_expression)"
|
|
732
|
-
]
|
|
733
|
-
},
|
|
734
|
-
{
|
|
735
|
-
"cell_type": "code",
|
|
736
|
-
"execution_count": 29,
|
|
737
|
-
"metadata": {},
|
|
738
|
-
"outputs": [
|
|
739
|
-
{
|
|
740
|
-
"name": "stdout",
|
|
741
|
-
"output_type": "stream",
|
|
742
|
-
"text": [
|
|
743
|
-
"Equivalent SQL: select REGR_count(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
744
|
-
"\n",
|
|
745
|
-
"\n",
|
|
746
|
-
" ************************* DataFrame ********************* \n",
|
|
747
|
-
" assined_col_\n",
|
|
748
|
-
"0 40\n",
|
|
749
|
-
"\n",
|
|
750
|
-
"\n",
|
|
751
|
-
"\n",
|
|
752
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
753
|
-
"assined_col_ int\n",
|
|
754
|
-
"\n",
|
|
755
|
-
"\n",
|
|
756
|
-
"\n",
|
|
757
|
-
" 'assined_col_' Column Type: INTEGER\n"
|
|
758
|
-
]
|
|
759
|
-
}
|
|
760
|
-
],
|
|
761
|
-
"source": [
|
|
762
|
-
"df = admissions_train.assign(True, \n",
|
|
763
|
-
" assined_col_=func.REGR_count(admissions_train.admitted.expression, \n",
|
|
764
|
-
" admissions_train.gpa.expression))\n",
|
|
765
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
766
|
-
]
|
|
767
|
-
},
|
|
768
|
-
{
|
|
769
|
-
"cell_type": "markdown",
|
|
770
|
-
"metadata": {},
|
|
771
|
-
"source": [
|
|
772
|
-
"## REGR_Intercept Function"
|
|
773
|
-
]
|
|
774
|
-
},
|
|
775
|
-
{
|
|
776
|
-
"cell_type": "code",
|
|
777
|
-
"execution_count": 30,
|
|
778
|
-
"metadata": {},
|
|
779
|
-
"outputs": [],
|
|
780
|
-
"source": [
|
|
781
|
-
"# Function returns the intercept of the univariate linear regression line through all non-null data pairs of the \n",
|
|
782
|
-
"# dependent and independent variable arguments.\n",
|
|
783
|
-
"# Syntax:\n",
|
|
784
|
-
"# REGR_Intercept(dependent_value_expression, independent_value_expression)"
|
|
785
|
-
]
|
|
786
|
-
},
|
|
787
|
-
{
|
|
788
|
-
"cell_type": "code",
|
|
789
|
-
"execution_count": 31,
|
|
790
|
-
"metadata": {},
|
|
791
|
-
"outputs": [
|
|
792
|
-
{
|
|
793
|
-
"name": "stdout",
|
|
794
|
-
"output_type": "stream",
|
|
795
|
-
"text": [
|
|
796
|
-
"Equivalent SQL: select REGR_Intercept(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
797
|
-
"\n",
|
|
798
|
-
"\n",
|
|
799
|
-
" ************************* DataFrame ********************* \n",
|
|
800
|
-
" assined_col_\n",
|
|
801
|
-
"0 0.724144\n",
|
|
802
|
-
"\n",
|
|
803
|
-
"\n",
|
|
804
|
-
"\n",
|
|
805
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
806
|
-
"assined_col_ float\n",
|
|
807
|
-
"\n",
|
|
808
|
-
"\n",
|
|
809
|
-
"\n",
|
|
810
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
811
|
-
]
|
|
812
|
-
}
|
|
813
|
-
],
|
|
814
|
-
"source": [
|
|
815
|
-
"df = admissions_train.assign(True, \n",
|
|
816
|
-
" assined_col_=func.REGR_Intercept(admissions_train.admitted.expression, \n",
|
|
817
|
-
" admissions_train.gpa.expression))\n",
|
|
818
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
819
|
-
]
|
|
820
|
-
},
|
|
821
|
-
{
|
|
822
|
-
"cell_type": "markdown",
|
|
823
|
-
"metadata": {},
|
|
824
|
-
"source": [
|
|
825
|
-
"## REGR_R2 Function"
|
|
826
|
-
]
|
|
827
|
-
},
|
|
828
|
-
{
|
|
829
|
-
"cell_type": "code",
|
|
830
|
-
"execution_count": 32,
|
|
831
|
-
"metadata": {},
|
|
832
|
-
"outputs": [],
|
|
833
|
-
"source": [
|
|
834
|
-
"# Function returns the coefficient of determination for all non-null data pairs of the dependent and independent \n",
|
|
835
|
-
"# variable arguments.\n",
|
|
836
|
-
"# Syntax:\n",
|
|
837
|
-
"# REGR_R2(dependent_value_expression, independent_value_expression)"
|
|
838
|
-
]
|
|
839
|
-
},
|
|
840
|
-
{
|
|
841
|
-
"cell_type": "code",
|
|
842
|
-
"execution_count": 33,
|
|
843
|
-
"metadata": {},
|
|
844
|
-
"outputs": [
|
|
845
|
-
{
|
|
846
|
-
"name": "stdout",
|
|
847
|
-
"output_type": "stream",
|
|
848
|
-
"text": [
|
|
849
|
-
"Equivalent SQL: select REGR_R2(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
850
|
-
"\n",
|
|
851
|
-
"\n",
|
|
852
|
-
" ************************* DataFrame ********************* \n",
|
|
853
|
-
" assined_col_\n",
|
|
854
|
-
"0 0.000496\n",
|
|
855
|
-
"\n",
|
|
856
|
-
"\n",
|
|
857
|
-
"\n",
|
|
858
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
859
|
-
"assined_col_ float\n",
|
|
860
|
-
"\n",
|
|
861
|
-
"\n",
|
|
862
|
-
"\n",
|
|
863
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
864
|
-
]
|
|
865
|
-
}
|
|
866
|
-
],
|
|
867
|
-
"source": [
|
|
868
|
-
"df = admissions_train.assign(True, \n",
|
|
869
|
-
" assined_col_=func.REGR_R2(admissions_train.admitted.expression, \n",
|
|
870
|
-
" admissions_train.gpa.expression))\n",
|
|
871
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
872
|
-
]
|
|
873
|
-
},
|
|
874
|
-
{
|
|
875
|
-
"cell_type": "markdown",
|
|
876
|
-
"metadata": {},
|
|
877
|
-
"source": [
|
|
878
|
-
"## REGR_SLOPE Function"
|
|
879
|
-
]
|
|
880
|
-
},
|
|
881
|
-
{
|
|
882
|
-
"cell_type": "code",
|
|
883
|
-
"execution_count": 34,
|
|
884
|
-
"metadata": {},
|
|
885
|
-
"outputs": [],
|
|
886
|
-
"source": [
|
|
887
|
-
"# Function returns the slope of the univariate linear regression line through all non-null data pairs of the \n",
|
|
888
|
-
"# dependent and independent variable arguments.\n",
|
|
889
|
-
"# Syntax:\n",
|
|
890
|
-
"# REGR_SLOPE(dependent_value_expression, independent_value_expression)"
|
|
891
|
-
]
|
|
892
|
-
},
|
|
893
|
-
{
|
|
894
|
-
"cell_type": "code",
|
|
895
|
-
"execution_count": 35,
|
|
896
|
-
"metadata": {},
|
|
897
|
-
"outputs": [
|
|
898
|
-
{
|
|
899
|
-
"name": "stdout",
|
|
900
|
-
"output_type": "stream",
|
|
901
|
-
"text": [
|
|
902
|
-
"Equivalent SQL: select REGR_SLOPE(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
903
|
-
"\n",
|
|
904
|
-
"\n",
|
|
905
|
-
" ************************* DataFrame ********************* \n",
|
|
906
|
-
" assined_col_\n",
|
|
907
|
-
"0 -0.020934\n",
|
|
908
|
-
"\n",
|
|
909
|
-
"\n",
|
|
910
|
-
"\n",
|
|
911
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
912
|
-
"assined_col_ float\n",
|
|
913
|
-
"\n",
|
|
914
|
-
"\n",
|
|
915
|
-
"\n",
|
|
916
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
917
|
-
]
|
|
918
|
-
}
|
|
919
|
-
],
|
|
920
|
-
"source": [
|
|
921
|
-
"df = admissions_train.assign(True, \n",
|
|
922
|
-
" assined_col_=func.REGR_SLOPE(admissions_train.admitted.expression, \n",
|
|
923
|
-
" admissions_train.gpa.expression))\n",
|
|
924
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
925
|
-
]
|
|
926
|
-
},
|
|
927
|
-
{
|
|
928
|
-
"cell_type": "markdown",
|
|
929
|
-
"metadata": {},
|
|
930
|
-
"source": [
|
|
931
|
-
"## REGR_SXX Function"
|
|
932
|
-
]
|
|
933
|
-
},
|
|
934
|
-
{
|
|
935
|
-
"cell_type": "code",
|
|
936
|
-
"execution_count": 36,
|
|
937
|
-
"metadata": {},
|
|
938
|
-
"outputs": [],
|
|
939
|
-
"source": [
|
|
940
|
-
"# Function returns the sum of the squares of the independent_variable_expression for all non-null data pairs of the \n",
|
|
941
|
-
"# dependent and independent variable arguments.\n",
|
|
942
|
-
"# Syntax:\n",
|
|
943
|
-
"# REGR_SXX(dependent_value_expression, independent_value_expression)"
|
|
944
|
-
]
|
|
945
|
-
},
|
|
946
|
-
{
|
|
947
|
-
"cell_type": "code",
|
|
948
|
-
"execution_count": 37,
|
|
949
|
-
"metadata": {},
|
|
950
|
-
"outputs": [
|
|
951
|
-
{
|
|
952
|
-
"name": "stdout",
|
|
953
|
-
"output_type": "stream",
|
|
954
|
-
"text": [
|
|
955
|
-
"Equivalent SQL: select REGR_SXX(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
956
|
-
"\n",
|
|
957
|
-
"\n",
|
|
958
|
-
" ************************* DataFrame ********************* \n",
|
|
959
|
-
" assined_col_\n",
|
|
960
|
-
"0 10.294177\n",
|
|
961
|
-
"\n",
|
|
962
|
-
"\n",
|
|
963
|
-
"\n",
|
|
964
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
965
|
-
"assined_col_ float\n",
|
|
966
|
-
"\n",
|
|
967
|
-
"\n",
|
|
968
|
-
"\n",
|
|
969
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
970
|
-
]
|
|
971
|
-
}
|
|
972
|
-
],
|
|
973
|
-
"source": [
|
|
974
|
-
"df = admissions_train.assign(True, \n",
|
|
975
|
-
" assined_col_=func.REGR_SXX(admissions_train.admitted.expression, \n",
|
|
976
|
-
" admissions_train.gpa.expression))\n",
|
|
977
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
978
|
-
]
|
|
979
|
-
},
|
|
980
|
-
{
|
|
981
|
-
"cell_type": "markdown",
|
|
982
|
-
"metadata": {},
|
|
983
|
-
"source": [
|
|
984
|
-
"## REGR_SXY Function"
|
|
985
|
-
]
|
|
986
|
-
},
|
|
987
|
-
{
|
|
988
|
-
"cell_type": "code",
|
|
989
|
-
"execution_count": 38,
|
|
990
|
-
"metadata": {},
|
|
991
|
-
"outputs": [],
|
|
992
|
-
"source": [
|
|
993
|
-
"# Function returns the sum of the products of the independent_variable_expression and the dependent_variable_expression \n",
|
|
994
|
-
"# for all non-null data pairs of the dependent and independent variable arguments.\n",
|
|
995
|
-
"# Syntax:\n",
|
|
996
|
-
"# REGR_SXY(dependent_value_expression, independent_value_expression)"
|
|
997
|
-
]
|
|
998
|
-
},
|
|
999
|
-
{
|
|
1000
|
-
"cell_type": "code",
|
|
1001
|
-
"execution_count": 39,
|
|
1002
|
-
"metadata": {},
|
|
1003
|
-
"outputs": [
|
|
1004
|
-
{
|
|
1005
|
-
"name": "stdout",
|
|
1006
|
-
"output_type": "stream",
|
|
1007
|
-
"text": [
|
|
1008
|
-
"Equivalent SQL: select REGR_SXY(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1009
|
-
"\n",
|
|
1010
|
-
"\n",
|
|
1011
|
-
" ************************* DataFrame ********************* \n",
|
|
1012
|
-
" assined_col_\n",
|
|
1013
|
-
"0 -0.2155\n",
|
|
1014
|
-
"\n",
|
|
1015
|
-
"\n",
|
|
1016
|
-
"\n",
|
|
1017
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1018
|
-
"assined_col_ float\n",
|
|
1019
|
-
"\n",
|
|
1020
|
-
"\n",
|
|
1021
|
-
"\n",
|
|
1022
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1023
|
-
]
|
|
1024
|
-
}
|
|
1025
|
-
],
|
|
1026
|
-
"source": [
|
|
1027
|
-
"df = admissions_train.assign(True, \n",
|
|
1028
|
-
" assined_col_=func.REGR_SXY(admissions_train.admitted.expression, \n",
|
|
1029
|
-
" admissions_train.gpa.expression))\n",
|
|
1030
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1031
|
-
]
|
|
1032
|
-
},
|
|
1033
|
-
{
|
|
1034
|
-
"cell_type": "markdown",
|
|
1035
|
-
"metadata": {},
|
|
1036
|
-
"source": [
|
|
1037
|
-
"## REGR_SYY Function"
|
|
1038
|
-
]
|
|
1039
|
-
},
|
|
1040
|
-
{
|
|
1041
|
-
"cell_type": "code",
|
|
1042
|
-
"execution_count": 40,
|
|
1043
|
-
"metadata": {},
|
|
1044
|
-
"outputs": [],
|
|
1045
|
-
"source": [
|
|
1046
|
-
"# Function returns the sum of the squares of the dependent_variable_expression for all non-null data pairs of the \n",
|
|
1047
|
-
"# dependent and independent variable arguments.\n",
|
|
1048
|
-
"# Syntax:\n",
|
|
1049
|
-
"# REGR_SYY(dependent_value_expression, independent_value_expression)"
|
|
1050
|
-
]
|
|
1051
|
-
},
|
|
1052
|
-
{
|
|
1053
|
-
"cell_type": "code",
|
|
1054
|
-
"execution_count": 41,
|
|
1055
|
-
"metadata": {},
|
|
1056
|
-
"outputs": [
|
|
1057
|
-
{
|
|
1058
|
-
"name": "stdout",
|
|
1059
|
-
"output_type": "stream",
|
|
1060
|
-
"text": [
|
|
1061
|
-
"Equivalent SQL: select REGR_SYY(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1062
|
-
"\n",
|
|
1063
|
-
"\n",
|
|
1064
|
-
" ************************* DataFrame ********************* \n",
|
|
1065
|
-
" assined_col_\n",
|
|
1066
|
-
"0 9.1\n",
|
|
1067
|
-
"\n",
|
|
1068
|
-
"\n",
|
|
1069
|
-
"\n",
|
|
1070
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1071
|
-
"assined_col_ float\n",
|
|
1072
|
-
"\n",
|
|
1073
|
-
"\n",
|
|
1074
|
-
"\n",
|
|
1075
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1076
|
-
]
|
|
1077
|
-
}
|
|
1078
|
-
],
|
|
1079
|
-
"source": [
|
|
1080
|
-
"df = admissions_train.assign(True, \n",
|
|
1081
|
-
" assined_col_=func.REGR_SYY(admissions_train.admitted.expression, \n",
|
|
1082
|
-
" admissions_train.gpa.expression))\n",
|
|
1083
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1084
|
-
]
|
|
1085
|
-
},
|
|
1086
|
-
{
|
|
1087
|
-
"cell_type": "markdown",
|
|
1088
|
-
"metadata": {},
|
|
1089
|
-
"source": [
|
|
1090
|
-
"## Skew Function"
|
|
1091
|
-
]
|
|
1092
|
-
},
|
|
1093
|
-
{
|
|
1094
|
-
"cell_type": "code",
|
|
1095
|
-
"execution_count": 42,
|
|
1096
|
-
"metadata": {},
|
|
1097
|
-
"outputs": [],
|
|
1098
|
-
"source": [
|
|
1099
|
-
"# Function returns the skewness of the distribution of value_expression.\n",
|
|
1100
|
-
"# Syntax:\n",
|
|
1101
|
-
"# skew(value_expression)"
|
|
1102
|
-
]
|
|
1103
|
-
},
|
|
1104
|
-
{
|
|
1105
|
-
"cell_type": "code",
|
|
1106
|
-
"execution_count": 43,
|
|
1107
|
-
"metadata": {},
|
|
1108
|
-
"outputs": [
|
|
1109
|
-
{
|
|
1110
|
-
"name": "stdout",
|
|
1111
|
-
"output_type": "stream",
|
|
1112
|
-
"text": [
|
|
1113
|
-
"Equivalent SQL: select skew(gpa) AS assined_col_float, skew(admitted) AS assined_col_int from \"admissions_train\"\n",
|
|
1114
|
-
"\n",
|
|
1115
|
-
"\n",
|
|
1116
|
-
" ************************* DataFrame ********************* \n",
|
|
1117
|
-
" assined_col_float assined_col_int\n",
|
|
1118
|
-
"0 -2.058969 -0.653746\n",
|
|
1119
|
-
"\n",
|
|
1120
|
-
"\n",
|
|
1121
|
-
"\n",
|
|
1122
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1123
|
-
"assined_col_float float\n",
|
|
1124
|
-
"assined_col_int float\n",
|
|
1125
|
-
"\n",
|
|
1126
|
-
"\n",
|
|
1127
|
-
"\n",
|
|
1128
|
-
" 'assined_col_int' Column Type: FLOAT\n",
|
|
1129
|
-
" 'assined_col_float' Column Type: FLOAT\n"
|
|
1130
|
-
]
|
|
1131
|
-
}
|
|
1132
|
-
],
|
|
1133
|
-
"source": [
|
|
1134
|
-
"df = admissions_train.assign(True, assined_col_int=func.skew(admissions_train.admitted.expression),\n",
|
|
1135
|
-
" assined_col_float=func.skew(admissions_train.gpa.expression))\n",
|
|
1136
|
-
"print_variables(df, [\"assined_col_int\", \"assined_col_float\"])"
|
|
1137
|
-
]
|
|
1138
|
-
},
|
|
1139
|
-
{
|
|
1140
|
-
"cell_type": "markdown",
|
|
1141
|
-
"metadata": {},
|
|
1142
|
-
"source": [
|
|
1143
|
-
"## stddev_pop Function"
|
|
1144
|
-
]
|
|
1145
|
-
},
|
|
1146
|
-
{
|
|
1147
|
-
"cell_type": "code",
|
|
1148
|
-
"execution_count": 44,
|
|
1149
|
-
"metadata": {},
|
|
1150
|
-
"outputs": [],
|
|
1151
|
-
"source": [
|
|
1152
|
-
"# Function returns the population standard deviation for the non-null data points in value_expression.\n",
|
|
1153
|
-
"# Syntax:\n",
|
|
1154
|
-
"# stddev_pop(value_expression)"
|
|
1155
|
-
]
|
|
1156
|
-
},
|
|
1157
|
-
{
|
|
1158
|
-
"cell_type": "code",
|
|
1159
|
-
"execution_count": 45,
|
|
1160
|
-
"metadata": {},
|
|
1161
|
-
"outputs": [
|
|
1162
|
-
{
|
|
1163
|
-
"name": "stdout",
|
|
1164
|
-
"output_type": "stream",
|
|
1165
|
-
"text": [
|
|
1166
|
-
"Equivalent SQL: select stddev_pop(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1167
|
-
"\n",
|
|
1168
|
-
"\n",
|
|
1169
|
-
" ************************* DataFrame ********************* \n",
|
|
1170
|
-
" assined_col_\n",
|
|
1171
|
-
"0 0.507301\n",
|
|
1172
|
-
"\n",
|
|
1173
|
-
"\n",
|
|
1174
|
-
"\n",
|
|
1175
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1176
|
-
"assined_col_ float\n",
|
|
1177
|
-
"\n",
|
|
1178
|
-
"\n",
|
|
1179
|
-
"\n",
|
|
1180
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1181
|
-
]
|
|
1182
|
-
}
|
|
1183
|
-
],
|
|
1184
|
-
"source": [
|
|
1185
|
-
"df = admissions_train.assign(True, assined_col_=func.stddev_pop(admissions_train.gpa.expression))\n",
|
|
1186
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1187
|
-
]
|
|
1188
|
-
},
|
|
1189
|
-
{
|
|
1190
|
-
"cell_type": "markdown",
|
|
1191
|
-
"metadata": {},
|
|
1192
|
-
"source": [
|
|
1193
|
-
"## stddev_samp Function"
|
|
1194
|
-
]
|
|
1195
|
-
},
|
|
1196
|
-
{
|
|
1197
|
-
"cell_type": "code",
|
|
1198
|
-
"execution_count": 46,
|
|
1199
|
-
"metadata": {},
|
|
1200
|
-
"outputs": [],
|
|
1201
|
-
"source": [
|
|
1202
|
-
"# Function returns the sample standard deviation for the non-null data points in value_expression.\n",
|
|
1203
|
-
"# Syntax:\n",
|
|
1204
|
-
"# stddev_samp(value_expression)"
|
|
1205
|
-
]
|
|
1206
|
-
},
|
|
1207
|
-
{
|
|
1208
|
-
"cell_type": "code",
|
|
1209
|
-
"execution_count": 47,
|
|
1210
|
-
"metadata": {},
|
|
1211
|
-
"outputs": [
|
|
1212
|
-
{
|
|
1213
|
-
"name": "stdout",
|
|
1214
|
-
"output_type": "stream",
|
|
1215
|
-
"text": [
|
|
1216
|
-
"Equivalent SQL: select stddev_samp(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1217
|
-
"\n",
|
|
1218
|
-
"\n",
|
|
1219
|
-
" ************************* DataFrame ********************* \n",
|
|
1220
|
-
" assined_col_\n",
|
|
1221
|
-
"0 0.513764\n",
|
|
1222
|
-
"\n",
|
|
1223
|
-
"\n",
|
|
1224
|
-
"\n",
|
|
1225
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1226
|
-
"assined_col_ float\n",
|
|
1227
|
-
"\n",
|
|
1228
|
-
"\n",
|
|
1229
|
-
"\n",
|
|
1230
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1231
|
-
]
|
|
1232
|
-
}
|
|
1233
|
-
],
|
|
1234
|
-
"source": [
|
|
1235
|
-
"df = admissions_train.assign(True, assined_col_=func.stddev_samp(admissions_train.gpa.expression))\n",
|
|
1236
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1237
|
-
]
|
|
1238
|
-
},
|
|
1239
|
-
{
|
|
1240
|
-
"cell_type": "markdown",
|
|
1241
|
-
"metadata": {},
|
|
1242
|
-
"source": [
|
|
1243
|
-
"## sum Function"
|
|
1244
|
-
]
|
|
1245
|
-
},
|
|
1246
|
-
{
|
|
1247
|
-
"cell_type": "code",
|
|
1248
|
-
"execution_count": 48,
|
|
1249
|
-
"metadata": {},
|
|
1250
|
-
"outputs": [],
|
|
1251
|
-
"source": [
|
|
1252
|
-
"# Function returns a column value that is the arithmetic sum of value_expression.\n",
|
|
1253
|
-
"# Syntax:\n",
|
|
1254
|
-
"# sum(value_expression)"
|
|
1255
|
-
]
|
|
1256
|
-
},
|
|
1257
|
-
{
|
|
1258
|
-
"cell_type": "code",
|
|
1259
|
-
"execution_count": 49,
|
|
1260
|
-
"metadata": {},
|
|
1261
|
-
"outputs": [
|
|
1262
|
-
{
|
|
1263
|
-
"name": "stdout",
|
|
1264
|
-
"output_type": "stream",
|
|
1265
|
-
"text": [
|
|
1266
|
-
"Equivalent SQL: select sum(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1267
|
-
"\n",
|
|
1268
|
-
"\n",
|
|
1269
|
-
" ************************* DataFrame ********************* \n",
|
|
1270
|
-
" assined_col_\n",
|
|
1271
|
-
"0 141.67\n",
|
|
1272
|
-
"\n",
|
|
1273
|
-
"\n",
|
|
1274
|
-
"\n",
|
|
1275
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1276
|
-
"assined_col_ float\n",
|
|
1277
|
-
"\n",
|
|
1278
|
-
"\n",
|
|
1279
|
-
"\n",
|
|
1280
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1281
|
-
]
|
|
1282
|
-
}
|
|
1283
|
-
],
|
|
1284
|
-
"source": [
|
|
1285
|
-
"df = admissions_train.assign(True, assined_col_=func.sum(admissions_train.gpa.expression))\n",
|
|
1286
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1287
|
-
]
|
|
1288
|
-
},
|
|
1289
|
-
{
|
|
1290
|
-
"cell_type": "markdown",
|
|
1291
|
-
"metadata": {},
|
|
1292
|
-
"source": [
|
|
1293
|
-
"## var_pop Function"
|
|
1294
|
-
]
|
|
1295
|
-
},
|
|
1296
|
-
{
|
|
1297
|
-
"cell_type": "code",
|
|
1298
|
-
"execution_count": 50,
|
|
1299
|
-
"metadata": {},
|
|
1300
|
-
"outputs": [],
|
|
1301
|
-
"source": [
|
|
1302
|
-
"# Function returns the population variance for the data points in value_expression.\n",
|
|
1303
|
-
"# Syntax:\n",
|
|
1304
|
-
"# var_pop(value_expression)"
|
|
1305
|
-
]
|
|
1306
|
-
},
|
|
1307
|
-
{
|
|
1308
|
-
"cell_type": "code",
|
|
1309
|
-
"execution_count": 51,
|
|
1310
|
-
"metadata": {},
|
|
1311
|
-
"outputs": [
|
|
1312
|
-
{
|
|
1313
|
-
"name": "stdout",
|
|
1314
|
-
"output_type": "stream",
|
|
1315
|
-
"text": [
|
|
1316
|
-
"Equivalent SQL: select var_pop(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1317
|
-
"\n",
|
|
1318
|
-
"\n",
|
|
1319
|
-
" ************************* DataFrame ********************* \n",
|
|
1320
|
-
" assined_col_\n",
|
|
1321
|
-
"0 0.257354\n",
|
|
1322
|
-
"\n",
|
|
1323
|
-
"\n",
|
|
1324
|
-
"\n",
|
|
1325
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1326
|
-
"assined_col_ float\n",
|
|
1327
|
-
"\n",
|
|
1328
|
-
"\n",
|
|
1329
|
-
"\n",
|
|
1330
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1331
|
-
]
|
|
1332
|
-
}
|
|
1333
|
-
],
|
|
1334
|
-
"source": [
|
|
1335
|
-
"df = admissions_train.assign(True, assined_col_=func.var_pop(admissions_train.gpa.expression))\n",
|
|
1336
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1337
|
-
]
|
|
1338
|
-
},
|
|
1339
|
-
{
|
|
1340
|
-
"cell_type": "markdown",
|
|
1341
|
-
"metadata": {},
|
|
1342
|
-
"source": [
|
|
1343
|
-
"## var_samp Function"
|
|
1344
|
-
]
|
|
1345
|
-
},
|
|
1346
|
-
{
|
|
1347
|
-
"cell_type": "code",
|
|
1348
|
-
"execution_count": 52,
|
|
1349
|
-
"metadata": {},
|
|
1350
|
-
"outputs": [],
|
|
1351
|
-
"source": [
|
|
1352
|
-
"# Function returns the sample variance for the data points in value_expression.\n",
|
|
1353
|
-
"# Syntax:\n",
|
|
1354
|
-
"# var_samp(value_expression)"
|
|
1355
|
-
]
|
|
1356
|
-
},
|
|
1357
|
-
{
|
|
1358
|
-
"cell_type": "code",
|
|
1359
|
-
"execution_count": 53,
|
|
1360
|
-
"metadata": {},
|
|
1361
|
-
"outputs": [
|
|
1362
|
-
{
|
|
1363
|
-
"name": "stdout",
|
|
1364
|
-
"output_type": "stream",
|
|
1365
|
-
"text": [
|
|
1366
|
-
"Equivalent SQL: select var_samp(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1367
|
-
"\n",
|
|
1368
|
-
"\n",
|
|
1369
|
-
" ************************* DataFrame ********************* \n",
|
|
1370
|
-
" assined_col_\n",
|
|
1371
|
-
"0 0.263953\n",
|
|
1372
|
-
"\n",
|
|
1373
|
-
"\n",
|
|
1374
|
-
"\n",
|
|
1375
|
-
" ************************* DataFrame.dtypes ********************* \n",
|
|
1376
|
-
"assined_col_ float\n",
|
|
1377
|
-
"\n",
|
|
1378
|
-
"\n",
|
|
1379
|
-
"\n",
|
|
1380
|
-
" 'assined_col_' Column Type: FLOAT\n"
|
|
1381
|
-
]
|
|
1382
|
-
}
|
|
1383
|
-
],
|
|
1384
|
-
"source": [
|
|
1385
|
-
"df = admissions_train.assign(True, assined_col_=func.var_samp(admissions_train.gpa.expression))\n",
|
|
1386
|
-
"print_variables(df, [\"assined_col_\"])"
|
|
1387
|
-
]
|
|
1388
|
-
},
|
|
1389
|
-
{
|
|
1390
|
-
"cell_type": "code",
|
|
1391
|
-
"execution_count": 54,
|
|
1392
|
-
"metadata": {},
|
|
1393
|
-
"outputs": [
|
|
1394
|
-
{
|
|
1395
|
-
"data": {
|
|
1396
|
-
"text/plain": [
|
|
1397
|
-
"True"
|
|
1398
|
-
]
|
|
1399
|
-
},
|
|
1400
|
-
"execution_count": 54,
|
|
1401
|
-
"metadata": {},
|
|
1402
|
-
"output_type": "execute_result"
|
|
1403
|
-
}
|
|
1404
|
-
],
|
|
1405
|
-
"source": [
|
|
1406
|
-
"# One must run remove_context() to close the connection and garbage collect internally generated objects.\n",
|
|
1407
|
-
"remove_context()"
|
|
1408
|
-
]
|
|
1409
|
-
},
|
|
1410
|
-
{
|
|
1411
|
-
"cell_type": "code",
|
|
1412
|
-
"execution_count": 55,
|
|
1413
|
-
"metadata": {},
|
|
1414
|
-
"outputs": [],
|
|
1415
|
-
"source": [
|
|
1416
|
-
"## Grouping, pivot, unpivot - Not possible to use."
|
|
1417
|
-
]
|
|
1418
|
-
},
|
|
1419
|
-
{
|
|
1420
|
-
"cell_type": "code",
|
|
1421
|
-
"execution_count": null,
|
|
1422
|
-
"metadata": {},
|
|
1423
|
-
"outputs": [],
|
|
1424
|
-
"source": []
|
|
1425
|
-
},
|
|
1426
|
-
{
|
|
1427
|
-
"cell_type": "code",
|
|
1428
|
-
"execution_count": null,
|
|
1429
|
-
"metadata": {},
|
|
1430
|
-
"outputs": [],
|
|
1431
|
-
"source": []
|
|
1432
|
-
}
|
|
1433
|
-
],
|
|
1434
|
-
"metadata": {
|
|
1435
|
-
"kernelspec": {
|
|
1436
|
-
"display_name": "Python 3",
|
|
1437
|
-
"language": "python",
|
|
1438
|
-
"name": "python3"
|
|
1439
|
-
},
|
|
1440
|
-
"language_info": {
|
|
1441
|
-
"codemirror_mode": {
|
|
1442
|
-
"name": "ipython",
|
|
1443
|
-
"version": 3
|
|
1444
|
-
},
|
|
1445
|
-
"file_extension": ".py",
|
|
1446
|
-
"mimetype": "text/x-python",
|
|
1447
|
-
"name": "python",
|
|
1448
|
-
"nbconvert_exporter": "python",
|
|
1449
|
-
"pygments_lexer": "ipython3",
|
|
1450
|
-
"version": "3.7.1"
|
|
1451
|
-
}
|
|
1452
|
-
},
|
|
1453
|
-
"nbformat": 4,
|
|
1454
|
-
"nbformat_minor": 2
|
|
1455
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"### Disclaimer\n",
|
|
8
|
+
"Please note, the Vantage Functions via SQLAlchemy feature is a preview/beta code release with limited functionality (the “Code”). As such, you acknowledge that the Code is experimental in nature and that the Code is provided “AS IS” and may not be functional on any machine or in any environment. TERADATA DISCLAIMS ALL WARRANTIES RELATING TO THE CODE, EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES AGAINST INFRINGEMENT OF THIRD-PARTY RIGHTS, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.\n",
|
|
9
|
+
"\n",
|
|
10
|
+
"TERADATA SHALL NOT BE RESPONSIBLE OR LIABLE WITH RESPECT TO ANY SUBJECT MATTER OF THE CODE UNDER ANY CONTRACT, NEGLIGENCE, STRICT LIABILITY OR OTHER THEORY \n",
|
|
11
|
+
" (A) FOR LOSS OR INACCURACY OF DATA OR COST OF PROCUREMENT OF SUBSTITUTE GOODS, SERVICES OR TECHNOLOGY, OR \n",
|
|
12
|
+
" (B) FOR ANY INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO LOSS OF REVENUES AND LOSS OF PROFITS. TERADATA SHALL NOT BE RESPONSIBLE FOR ANY MATTER BEYOND ITS REASONABLE CONTROL.\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"Notwithstanding anything to the contrary: \n",
|
|
15
|
+
" (a) Teradata will have no obligation of any kind with respect to any Code-related comments, suggestions, design changes or improvements that you elect to provide to Teradata in either verbal or written form (collectively, “Feedback”), and \n",
|
|
16
|
+
" (b) Teradata and its affiliates are hereby free to use any ideas, concepts, know-how or techniques, in whole or in part, contained in Feedback: \n",
|
|
17
|
+
" (i) for any purpose whatsoever, including developing, manufacturing, and/or marketing products and/or services incorporating Feedback in whole or in part, and \n",
|
|
18
|
+
" (ii) without any restrictions or limitations, including requiring the payment of any license fees, royalties, or other consideration. "
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"cell_type": "code",
|
|
23
|
+
"execution_count": 1,
|
|
24
|
+
"metadata": {},
|
|
25
|
+
"outputs": [],
|
|
26
|
+
"source": [
|
|
27
|
+
"# In this notebook, we will be covering examples for following Regular Aggregate Functions\n",
|
|
28
|
+
"# SQL Documentation: https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/c2fX4dzxCcDJFKqXbyQtTA\n",
|
|
29
|
+
" # 1. avg/average/ave\n",
|
|
30
|
+
" # 2. corr\n",
|
|
31
|
+
" # 3. count\n",
|
|
32
|
+
" # 4. covar_pop\n",
|
|
33
|
+
" # 5. covar_samp\n",
|
|
34
|
+
" # 6. var_pop\n",
|
|
35
|
+
" # 7. var_samp\n",
|
|
36
|
+
" # 8. kurtosis\n",
|
|
37
|
+
" # 9. max\n",
|
|
38
|
+
" # 10. REGR_AVGX\n",
|
|
39
|
+
" # 11. REGR_AVGY\n",
|
|
40
|
+
" # 12. REGR_Intercept\n",
|
|
41
|
+
" # 13. REGR_SLOPE\n",
|
|
42
|
+
" # 14. REGR_R2\n",
|
|
43
|
+
" # 15. REGR_SXX\n",
|
|
44
|
+
" # 16. REGR_SXY\n",
|
|
45
|
+
" # 17. REGR_SYY\n",
|
|
46
|
+
" # 18. min\n",
|
|
47
|
+
" # 19. skew\n",
|
|
48
|
+
" # 20. stddev_pop\n",
|
|
49
|
+
" # 21. stddev_samp\n",
|
|
50
|
+
" # 22. sum"
|
|
51
|
+
]
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"cell_type": "code",
|
|
55
|
+
"execution_count": 2,
|
|
56
|
+
"metadata": {},
|
|
57
|
+
"outputs": [
|
|
58
|
+
{
|
|
59
|
+
"name": "stdout",
|
|
60
|
+
"output_type": "stream",
|
|
61
|
+
"text": [
|
|
62
|
+
"Hostname: ········\n",
|
|
63
|
+
"Username: ········\n",
|
|
64
|
+
"Password: ········\n",
|
|
65
|
+
"WARNING: Skipped loading table admissions_train since it already exists in the database.\n"
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
],
|
|
69
|
+
"source": [
|
|
70
|
+
"# Get the connection to the Vantage using create_context()\n",
|
|
71
|
+
"from teradataml import *\n",
|
|
72
|
+
"import getpass\n",
|
|
73
|
+
"td_context = create_context(host=getpass.getpass(\"Hostname: \"), username=getpass.getpass(\"Username: \"), password=getpass.getpass(\"Password: \"))\n",
|
|
74
|
+
"# Load the example dataset.\n",
|
|
75
|
+
"load_example_data(\"GLM\", [\"admissions_train\"])"
|
|
76
|
+
]
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"cell_type": "code",
|
|
80
|
+
"execution_count": 3,
|
|
81
|
+
"metadata": {},
|
|
82
|
+
"outputs": [
|
|
83
|
+
{
|
|
84
|
+
"data": {
|
|
85
|
+
"text/plain": [
|
|
86
|
+
" masters gpa stats programming admitted\n",
|
|
87
|
+
"id \n",
|
|
88
|
+
"15 yes 4.00 Advanced Advanced 1\n",
|
|
89
|
+
"7 yes 2.33 Novice Novice 1\n",
|
|
90
|
+
"22 yes 3.46 Novice Beginner 0\n",
|
|
91
|
+
"17 no 3.83 Advanced Advanced 1\n",
|
|
92
|
+
"13 no 4.00 Advanced Novice 1\n",
|
|
93
|
+
"38 yes 2.65 Advanced Beginner 1\n",
|
|
94
|
+
"26 yes 3.57 Advanced Advanced 1\n",
|
|
95
|
+
"5 no 3.44 Novice Novice 0\n",
|
|
96
|
+
"34 yes 3.85 Advanced Beginner 0\n",
|
|
97
|
+
"40 yes 3.95 Novice Beginner 0"
|
|
98
|
+
]
|
|
99
|
+
},
|
|
100
|
+
"execution_count": 3,
|
|
101
|
+
"metadata": {},
|
|
102
|
+
"output_type": "execute_result"
|
|
103
|
+
}
|
|
104
|
+
],
|
|
105
|
+
"source": [
|
|
106
|
+
"# Create the DataFrame on 'admissions_train' table\n",
|
|
107
|
+
"admissions_train = DataFrame(\"admissions_train\")\n",
|
|
108
|
+
"admissions_train"
|
|
109
|
+
]
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"cell_type": "code",
|
|
113
|
+
"execution_count": 4,
|
|
114
|
+
"metadata": {},
|
|
115
|
+
"outputs": [],
|
|
116
|
+
"source": [
|
|
117
|
+
"def print_variables(df, columns):\n",
|
|
118
|
+
" print(\"Equivalent SQL: {}\".format(df.show_query()))\n",
|
|
119
|
+
" print(\"\\n\")\n",
|
|
120
|
+
" print(\" ************************* DataFrame ********************* \")\n",
|
|
121
|
+
" print(df)\n",
|
|
122
|
+
" print(\"\\n\\n\")\n",
|
|
123
|
+
" print(\" ************************* DataFrame.dtypes ********************* \")\n",
|
|
124
|
+
" print(df.dtypes)\n",
|
|
125
|
+
" print(\"\\n\\n\")\n",
|
|
126
|
+
" if isinstance(columns, str):\n",
|
|
127
|
+
" columns = [columns]\n",
|
|
128
|
+
" for col in columns:\n",
|
|
129
|
+
" coltype = df.__getattr__(col).type\n",
|
|
130
|
+
" if isinstance(coltype, sqlalchemy.sql.sqltypes.NullType):\n",
|
|
131
|
+
" coltype = \"NullType\"\n",
|
|
132
|
+
" print(\" '{}' Column Type: {}\".format(col, coltype))"
|
|
133
|
+
]
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"cell_type": "markdown",
|
|
137
|
+
"metadata": {},
|
|
138
|
+
"source": [
|
|
139
|
+
"# Using Aggregate Functions from Teradata Vanatge with SQLAlchemy"
|
|
140
|
+
]
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
"cell_type": "code",
|
|
144
|
+
"execution_count": 5,
|
|
145
|
+
"metadata": {},
|
|
146
|
+
"outputs": [],
|
|
147
|
+
"source": [
|
|
148
|
+
"# Import func from SQLAlchemy to use the same for executing aggregate functions\n",
|
|
149
|
+
"from sqlalchemy import func"
|
|
150
|
+
]
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"cell_type": "code",
|
|
154
|
+
"execution_count": 6,
|
|
155
|
+
"metadata": {},
|
|
156
|
+
"outputs": [],
|
|
157
|
+
"source": [
|
|
158
|
+
"# Before we move on with examples, one should read below just to understand how teradataml DataFrame and \n",
|
|
159
|
+
"# it's columns are used to create a SQLAlchemy ClauseElement/Expression.\n",
|
|
160
|
+
"\n",
|
|
161
|
+
"# Often in below examples one would see something like this: 'admissions_train.admitted.expression'\n",
|
|
162
|
+
"# Here in the above expression,\n",
|
|
163
|
+
"# 'admissions_train' is 'teradataml DataFrame'\n",
|
|
164
|
+
"# 'admitted' is 'column name' in teradataml DataFrame 'admissions_train'\n",
|
|
165
|
+
"# Thus, \n",
|
|
166
|
+
"# 'admissions_train.admitted' together forms a ColumnExpression.\n",
|
|
167
|
+
"# expression allows us to use teradata ColumnExpression to be treated as SQLAlchemy Expression.\n",
|
|
168
|
+
"# Thus,\n",
|
|
169
|
+
"# 'admissions_train.admitted.expression' gives us an expression that can be used with SQLAlchemy clauseElements."
|
|
170
|
+
]
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
"cell_type": "markdown",
|
|
174
|
+
"metadata": {},
|
|
175
|
+
"source": [
|
|
176
|
+
"## Avg/Average/Ave Function"
|
|
177
|
+
]
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
"cell_type": "code",
|
|
181
|
+
"execution_count": 7,
|
|
182
|
+
"metadata": {},
|
|
183
|
+
"outputs": [],
|
|
184
|
+
"source": [
|
|
185
|
+
"# Function returns the arithmetic average of all values in value_expression.\n",
|
|
186
|
+
"# Syntax:\n",
|
|
187
|
+
"# Avg(value_expression)"
|
|
188
|
+
]
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
"cell_type": "code",
|
|
192
|
+
"execution_count": 8,
|
|
193
|
+
"metadata": {},
|
|
194
|
+
"outputs": [
|
|
195
|
+
{
|
|
196
|
+
"data": {
|
|
197
|
+
"text/plain": [
|
|
198
|
+
"sqlalchemy.sql.functions.Function"
|
|
199
|
+
]
|
|
200
|
+
},
|
|
201
|
+
"execution_count": 8,
|
|
202
|
+
"metadata": {},
|
|
203
|
+
"output_type": "execute_result"
|
|
204
|
+
}
|
|
205
|
+
],
|
|
206
|
+
"source": [
|
|
207
|
+
"agg_func_ = func.avg(admissions_train.gpa.expression)\n",
|
|
208
|
+
"type(agg_func_)"
|
|
209
|
+
]
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"cell_type": "code",
|
|
213
|
+
"execution_count": 9,
|
|
214
|
+
"metadata": {},
|
|
215
|
+
"outputs": [
|
|
216
|
+
{
|
|
217
|
+
"name": "stdout",
|
|
218
|
+
"output_type": "stream",
|
|
219
|
+
"text": [
|
|
220
|
+
"Equivalent SQL: select ave(admitted) AS ave_admitted_, average(admitted) AS average_admitted_, avg(gpa) AS avg_gpa_ from \"admissions_train\"\n",
|
|
221
|
+
"\n",
|
|
222
|
+
"\n",
|
|
223
|
+
" ************************* DataFrame ********************* \n",
|
|
224
|
+
" ave_admitted_ average_admitted_ avg_gpa_\n",
|
|
225
|
+
"0 0.65 0.65 3.54175\n",
|
|
226
|
+
"\n",
|
|
227
|
+
"\n",
|
|
228
|
+
"\n",
|
|
229
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
230
|
+
"ave_admitted_ float\n",
|
|
231
|
+
"average_admitted_ float\n",
|
|
232
|
+
"avg_gpa_ float\n",
|
|
233
|
+
"\n",
|
|
234
|
+
"\n",
|
|
235
|
+
"\n",
|
|
236
|
+
" 'avg_gpa_' Column Type: FLOAT\n",
|
|
237
|
+
" 'average_admitted_' Column Type: FLOAT\n",
|
|
238
|
+
" 'ave_admitted_' Column Type: FLOAT\n"
|
|
239
|
+
]
|
|
240
|
+
}
|
|
241
|
+
],
|
|
242
|
+
"source": [
|
|
243
|
+
"df = admissions_train.assign(True, avg_gpa_=agg_func_, \n",
|
|
244
|
+
" average_admitted_=func.average(admissions_train.admitted.expression),\n",
|
|
245
|
+
" ave_admitted_=func.ave(admissions_train.admitted.expression))\n",
|
|
246
|
+
"print_variables(df, [\"avg_gpa_\", \"average_admitted_\", \"ave_admitted_\"])"
|
|
247
|
+
]
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
"cell_type": "markdown",
|
|
251
|
+
"metadata": {},
|
|
252
|
+
"source": [
|
|
253
|
+
"## CORR Function"
|
|
254
|
+
]
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
"cell_type": "code",
|
|
258
|
+
"execution_count": 10,
|
|
259
|
+
"metadata": {},
|
|
260
|
+
"outputs": [],
|
|
261
|
+
"source": [
|
|
262
|
+
"# Function returns the Sample Pearson product moment correlation coefficient of its arguments for all non-null data point pairs.\n",
|
|
263
|
+
"# Syntax:\n",
|
|
264
|
+
"# Corr(value_expression1, value_expression2)"
|
|
265
|
+
]
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
"cell_type": "code",
|
|
269
|
+
"execution_count": 11,
|
|
270
|
+
"metadata": {},
|
|
271
|
+
"outputs": [
|
|
272
|
+
{
|
|
273
|
+
"name": "stdout",
|
|
274
|
+
"output_type": "stream",
|
|
275
|
+
"text": [
|
|
276
|
+
"Equivalent SQL: select corr(admitted, gpa) AS corr_numeric_ from \"admissions_train\"\n",
|
|
277
|
+
"\n",
|
|
278
|
+
"\n",
|
|
279
|
+
" ************************* DataFrame ********************* \n",
|
|
280
|
+
" corr_numeric_\n",
|
|
281
|
+
"0 -0.022265\n",
|
|
282
|
+
"\n",
|
|
283
|
+
"\n",
|
|
284
|
+
"\n",
|
|
285
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
286
|
+
"corr_numeric_ float\n",
|
|
287
|
+
"\n",
|
|
288
|
+
"\n",
|
|
289
|
+
"\n",
|
|
290
|
+
" 'corr_numeric_' Column Type: FLOAT\n"
|
|
291
|
+
]
|
|
292
|
+
}
|
|
293
|
+
],
|
|
294
|
+
"source": [
|
|
295
|
+
"df = admissions_train.assign(True, \n",
|
|
296
|
+
" corr_numeric_=func.corr(admissions_train.admitted.expression, admissions_train.gpa.expression))\n",
|
|
297
|
+
"print_variables(df, [\"corr_numeric_\"])"
|
|
298
|
+
]
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
"cell_type": "markdown",
|
|
302
|
+
"metadata": {},
|
|
303
|
+
"source": [
|
|
304
|
+
"## Count Function"
|
|
305
|
+
]
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
"cell_type": "code",
|
|
309
|
+
"execution_count": 12,
|
|
310
|
+
"metadata": {},
|
|
311
|
+
"outputs": [],
|
|
312
|
+
"source": [
|
|
313
|
+
"# Function returns a column value that is the total number of qualified rows in value_expression.\n",
|
|
314
|
+
"# Syntax:\n",
|
|
315
|
+
"# Count(value_expression)"
|
|
316
|
+
]
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
"cell_type": "code",
|
|
320
|
+
"execution_count": 13,
|
|
321
|
+
"metadata": {},
|
|
322
|
+
"outputs": [
|
|
323
|
+
{
|
|
324
|
+
"name": "stdout",
|
|
325
|
+
"output_type": "stream",
|
|
326
|
+
"text": [
|
|
327
|
+
"Equivalent SQL: select count(admitted) AS assined_count_col_ from \"admissions_train\"\n",
|
|
328
|
+
"\n",
|
|
329
|
+
"\n",
|
|
330
|
+
" ************************* DataFrame ********************* \n",
|
|
331
|
+
" assined_count_col_\n",
|
|
332
|
+
"0 40\n",
|
|
333
|
+
"\n",
|
|
334
|
+
"\n",
|
|
335
|
+
"\n",
|
|
336
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
337
|
+
"assined_count_col_ int\n",
|
|
338
|
+
"\n",
|
|
339
|
+
"\n",
|
|
340
|
+
"\n",
|
|
341
|
+
" 'assined_count_col_' Column Type: INTEGER\n"
|
|
342
|
+
]
|
|
343
|
+
}
|
|
344
|
+
],
|
|
345
|
+
"source": [
|
|
346
|
+
"df = admissions_train.assign(True, assined_count_col_=func.count(admissions_train.admitted.expression))\n",
|
|
347
|
+
"print_variables(df, [\"assined_count_col_\"])"
|
|
348
|
+
]
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
"cell_type": "markdown",
|
|
352
|
+
"metadata": {},
|
|
353
|
+
"source": [
|
|
354
|
+
"## Covar_pop Function"
|
|
355
|
+
]
|
|
356
|
+
},
|
|
357
|
+
{
|
|
358
|
+
"cell_type": "code",
|
|
359
|
+
"execution_count": 14,
|
|
360
|
+
"metadata": {},
|
|
361
|
+
"outputs": [],
|
|
362
|
+
"source": [
|
|
363
|
+
"# Function returns the population covariance of its arguments for all non-null data point pairs.\n",
|
|
364
|
+
"# Syntax:\n",
|
|
365
|
+
"# Covar_pop(value_expression1, value_expression2)"
|
|
366
|
+
]
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
"cell_type": "code",
|
|
370
|
+
"execution_count": 15,
|
|
371
|
+
"metadata": {},
|
|
372
|
+
"outputs": [
|
|
373
|
+
{
|
|
374
|
+
"name": "stdout",
|
|
375
|
+
"output_type": "stream",
|
|
376
|
+
"text": [
|
|
377
|
+
"Equivalent SQL: select Covar_pop(admitted, gpa) AS \"assined_col_Covar_pop\" from \"admissions_train\"\n",
|
|
378
|
+
"\n",
|
|
379
|
+
"\n",
|
|
380
|
+
" ************************* DataFrame ********************* \n",
|
|
381
|
+
" assined_col_Covar_pop\n",
|
|
382
|
+
"0 -0.005387\n",
|
|
383
|
+
"\n",
|
|
384
|
+
"\n",
|
|
385
|
+
"\n",
|
|
386
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
387
|
+
"assined_col_Covar_pop float\n",
|
|
388
|
+
"\n",
|
|
389
|
+
"\n",
|
|
390
|
+
"\n",
|
|
391
|
+
" 'assined_col_Covar_pop' Column Type: FLOAT\n"
|
|
392
|
+
]
|
|
393
|
+
}
|
|
394
|
+
],
|
|
395
|
+
"source": [
|
|
396
|
+
"df = admissions_train.assign(True, \n",
|
|
397
|
+
" assined_col_Covar_pop=func.Covar_pop(admissions_train.admitted.expression, admissions_train.gpa.expression))\n",
|
|
398
|
+
"print_variables(df, [\"assined_col_Covar_pop\"])"
|
|
399
|
+
]
|
|
400
|
+
},
|
|
401
|
+
{
|
|
402
|
+
"cell_type": "markdown",
|
|
403
|
+
"metadata": {},
|
|
404
|
+
"source": [
|
|
405
|
+
"## Covar_samp Function"
|
|
406
|
+
]
|
|
407
|
+
},
|
|
408
|
+
{
|
|
409
|
+
"cell_type": "code",
|
|
410
|
+
"execution_count": 16,
|
|
411
|
+
"metadata": {},
|
|
412
|
+
"outputs": [],
|
|
413
|
+
"source": [
|
|
414
|
+
"# Function returns the sample covariance of its arguments for all non-null data point pairs.\n",
|
|
415
|
+
"# Syntax:\n",
|
|
416
|
+
"# Covar_samp(value_expression)"
|
|
417
|
+
]
|
|
418
|
+
},
|
|
419
|
+
{
|
|
420
|
+
"cell_type": "code",
|
|
421
|
+
"execution_count": 17,
|
|
422
|
+
"metadata": {},
|
|
423
|
+
"outputs": [
|
|
424
|
+
{
|
|
425
|
+
"name": "stdout",
|
|
426
|
+
"output_type": "stream",
|
|
427
|
+
"text": [
|
|
428
|
+
"Equivalent SQL: select Covar_samp(admitted, gpa) AS \"assined_col_Covar_samp\" from \"admissions_train\"\n",
|
|
429
|
+
"\n",
|
|
430
|
+
"\n",
|
|
431
|
+
" ************************* DataFrame ********************* \n",
|
|
432
|
+
" assined_col_Covar_samp\n",
|
|
433
|
+
"0 -0.005526\n",
|
|
434
|
+
"\n",
|
|
435
|
+
"\n",
|
|
436
|
+
"\n",
|
|
437
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
438
|
+
"assined_col_Covar_samp float\n",
|
|
439
|
+
"\n",
|
|
440
|
+
"\n",
|
|
441
|
+
"\n",
|
|
442
|
+
" 'assined_col_Covar_samp' Column Type: FLOAT\n"
|
|
443
|
+
]
|
|
444
|
+
}
|
|
445
|
+
],
|
|
446
|
+
"source": [
|
|
447
|
+
"df = admissions_train.assign(True, \n",
|
|
448
|
+
" assined_col_Covar_samp=func.Covar_samp(admissions_train.admitted.expression, admissions_train.gpa.expression))\n",
|
|
449
|
+
"print_variables(df, [\"assined_col_Covar_samp\"])"
|
|
450
|
+
]
|
|
451
|
+
},
|
|
452
|
+
{
|
|
453
|
+
"cell_type": "markdown",
|
|
454
|
+
"metadata": {},
|
|
455
|
+
"source": [
|
|
456
|
+
"## Kurtosis Function"
|
|
457
|
+
]
|
|
458
|
+
},
|
|
459
|
+
{
|
|
460
|
+
"cell_type": "code",
|
|
461
|
+
"execution_count": 18,
|
|
462
|
+
"metadata": {},
|
|
463
|
+
"outputs": [],
|
|
464
|
+
"source": [
|
|
465
|
+
"# Function returns the kurtosis of the distribution of value_expression.\n",
|
|
466
|
+
"# Syntax:\n",
|
|
467
|
+
"# Kurtosis(value_expression)"
|
|
468
|
+
]
|
|
469
|
+
},
|
|
470
|
+
{
|
|
471
|
+
"cell_type": "code",
|
|
472
|
+
"execution_count": 19,
|
|
473
|
+
"metadata": {},
|
|
474
|
+
"outputs": [
|
|
475
|
+
{
|
|
476
|
+
"name": "stdout",
|
|
477
|
+
"output_type": "stream",
|
|
478
|
+
"text": [
|
|
479
|
+
"Equivalent SQL: select Kurtosis(gpa) AS \"assined_col_Kurtosis_num\" from \"admissions_train\"\n",
|
|
480
|
+
"\n",
|
|
481
|
+
"\n",
|
|
482
|
+
" ************************* DataFrame ********************* \n",
|
|
483
|
+
" assined_col_Kurtosis_num\n",
|
|
484
|
+
"0 4.052659\n",
|
|
485
|
+
"\n",
|
|
486
|
+
"\n",
|
|
487
|
+
"\n",
|
|
488
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
489
|
+
"assined_col_Kurtosis_num float\n",
|
|
490
|
+
"\n",
|
|
491
|
+
"\n",
|
|
492
|
+
"\n",
|
|
493
|
+
" 'assined_col_Kurtosis_num' Column Type: FLOAT\n"
|
|
494
|
+
]
|
|
495
|
+
}
|
|
496
|
+
],
|
|
497
|
+
"source": [
|
|
498
|
+
"df = admissions_train.assign(True, assined_col_Kurtosis_num=func.Kurtosis(admissions_train.gpa.expression))\n",
|
|
499
|
+
"print_variables(df, [\"assined_col_Kurtosis_num\"])"
|
|
500
|
+
]
|
|
501
|
+
},
|
|
502
|
+
{
|
|
503
|
+
"cell_type": "markdown",
|
|
504
|
+
"metadata": {},
|
|
505
|
+
"source": [
|
|
506
|
+
"## max/maximum Function"
|
|
507
|
+
]
|
|
508
|
+
},
|
|
509
|
+
{
|
|
510
|
+
"cell_type": "code",
|
|
511
|
+
"execution_count": 20,
|
|
512
|
+
"metadata": {},
|
|
513
|
+
"outputs": [],
|
|
514
|
+
"source": [
|
|
515
|
+
"# Function returns a column value that is the maximum value for value_expression.\n",
|
|
516
|
+
"# Syntax:\n",
|
|
517
|
+
"# max(value_expression)"
|
|
518
|
+
]
|
|
519
|
+
},
|
|
520
|
+
{
|
|
521
|
+
"cell_type": "code",
|
|
522
|
+
"execution_count": 21,
|
|
523
|
+
"metadata": {},
|
|
524
|
+
"outputs": [
|
|
525
|
+
{
|
|
526
|
+
"name": "stdout",
|
|
527
|
+
"output_type": "stream",
|
|
528
|
+
"text": [
|
|
529
|
+
"Equivalent SQL: select max(gpa) AS assined_col_max, maximum(stats) AS assined_col_maximum from \"admissions_train\"\n",
|
|
530
|
+
"\n",
|
|
531
|
+
"\n",
|
|
532
|
+
" ************************* DataFrame ********************* \n",
|
|
533
|
+
" assined_col_max assined_col_maximum\n",
|
|
534
|
+
"0 4.0 Novice\n",
|
|
535
|
+
"\n",
|
|
536
|
+
"\n",
|
|
537
|
+
"\n",
|
|
538
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
539
|
+
"assined_col_max float\n",
|
|
540
|
+
"assined_col_maximum str\n",
|
|
541
|
+
"\n",
|
|
542
|
+
"\n",
|
|
543
|
+
"\n",
|
|
544
|
+
" 'assined_col_maximum' Column Type: VARCHAR\n",
|
|
545
|
+
" 'assined_col_max' Column Type: FLOAT\n"
|
|
546
|
+
]
|
|
547
|
+
}
|
|
548
|
+
],
|
|
549
|
+
"source": [
|
|
550
|
+
"df = admissions_train.assign(True, \n",
|
|
551
|
+
" assined_col_max=func.max(admissions_train.gpa.expression),\n",
|
|
552
|
+
" assined_col_maximum=func.maximum(admissions_train.stats.expression))\n",
|
|
553
|
+
"print_variables(df, [\"assined_col_maximum\", \"assined_col_max\"])"
|
|
554
|
+
]
|
|
555
|
+
},
|
|
556
|
+
{
|
|
557
|
+
"cell_type": "markdown",
|
|
558
|
+
"metadata": {},
|
|
559
|
+
"source": [
|
|
560
|
+
"## min/minimum Function"
|
|
561
|
+
]
|
|
562
|
+
},
|
|
563
|
+
{
|
|
564
|
+
"cell_type": "code",
|
|
565
|
+
"execution_count": 22,
|
|
566
|
+
"metadata": {},
|
|
567
|
+
"outputs": [],
|
|
568
|
+
"source": [
|
|
569
|
+
"# Function returns a column value that is the minimum value for value_expression.\n",
|
|
570
|
+
"# Syntax:\n",
|
|
571
|
+
"# min(value_expression)"
|
|
572
|
+
]
|
|
573
|
+
},
|
|
574
|
+
{
|
|
575
|
+
"cell_type": "code",
|
|
576
|
+
"execution_count": 23,
|
|
577
|
+
"metadata": {},
|
|
578
|
+
"outputs": [
|
|
579
|
+
{
|
|
580
|
+
"name": "stdout",
|
|
581
|
+
"output_type": "stream",
|
|
582
|
+
"text": [
|
|
583
|
+
"Equivalent SQL: select min(gpa) AS assined_col_min, minimum(stats) AS assined_col_minimum from \"admissions_train\"\n",
|
|
584
|
+
"\n",
|
|
585
|
+
"\n",
|
|
586
|
+
" ************************* DataFrame ********************* \n",
|
|
587
|
+
" assined_col_min assined_col_minimum\n",
|
|
588
|
+
"0 1.87 Advanced\n",
|
|
589
|
+
"\n",
|
|
590
|
+
"\n",
|
|
591
|
+
"\n",
|
|
592
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
593
|
+
"assined_col_min float\n",
|
|
594
|
+
"assined_col_minimum str\n",
|
|
595
|
+
"\n",
|
|
596
|
+
"\n",
|
|
597
|
+
"\n",
|
|
598
|
+
" 'assined_col_min' Column Type: FLOAT\n",
|
|
599
|
+
" 'assined_col_minimum' Column Type: VARCHAR\n"
|
|
600
|
+
]
|
|
601
|
+
}
|
|
602
|
+
],
|
|
603
|
+
"source": [
|
|
604
|
+
"df = admissions_train.assign(True, \n",
|
|
605
|
+
" assined_col_min=func.min(admissions_train.gpa.expression),\n",
|
|
606
|
+
" assined_col_minimum=func.minimum(admissions_train.stats.expression))\n",
|
|
607
|
+
"print_variables(df, [\"assined_col_min\", \"assined_col_minimum\"])"
|
|
608
|
+
]
|
|
609
|
+
},
|
|
610
|
+
{
|
|
611
|
+
"cell_type": "markdown",
|
|
612
|
+
"metadata": {},
|
|
613
|
+
"source": [
|
|
614
|
+
"## REGR_AVGX Function"
|
|
615
|
+
]
|
|
616
|
+
},
|
|
617
|
+
{
|
|
618
|
+
"cell_type": "code",
|
|
619
|
+
"execution_count": 24,
|
|
620
|
+
"metadata": {},
|
|
621
|
+
"outputs": [],
|
|
622
|
+
"source": [
|
|
623
|
+
"# Function returns the mean of the independent_variable_expression for all non-null data pairs of the \n",
|
|
624
|
+
"# dependent and independent variable arguments.\n",
|
|
625
|
+
"# Syntax:\n",
|
|
626
|
+
"# REGR_AVGX(dependent_value_expression, independent_value_expression)"
|
|
627
|
+
]
|
|
628
|
+
},
|
|
629
|
+
{
|
|
630
|
+
"cell_type": "code",
|
|
631
|
+
"execution_count": 25,
|
|
632
|
+
"metadata": {},
|
|
633
|
+
"outputs": [
|
|
634
|
+
{
|
|
635
|
+
"name": "stdout",
|
|
636
|
+
"output_type": "stream",
|
|
637
|
+
"text": [
|
|
638
|
+
"Equivalent SQL: select regr_avgx(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
639
|
+
"\n",
|
|
640
|
+
"\n",
|
|
641
|
+
" ************************* DataFrame ********************* \n",
|
|
642
|
+
" assined_col_\n",
|
|
643
|
+
"0 3.54175\n",
|
|
644
|
+
"\n",
|
|
645
|
+
"\n",
|
|
646
|
+
"\n",
|
|
647
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
648
|
+
"assined_col_ float\n",
|
|
649
|
+
"\n",
|
|
650
|
+
"\n",
|
|
651
|
+
"\n",
|
|
652
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
653
|
+
]
|
|
654
|
+
}
|
|
655
|
+
],
|
|
656
|
+
"source": [
|
|
657
|
+
"df = admissions_train.assign(True, \n",
|
|
658
|
+
" assined_col_=func.regr_avgx(admissions_train.admitted.expression, \n",
|
|
659
|
+
" admissions_train.gpa.expression))\n",
|
|
660
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
661
|
+
]
|
|
662
|
+
},
|
|
663
|
+
{
|
|
664
|
+
"cell_type": "markdown",
|
|
665
|
+
"metadata": {},
|
|
666
|
+
"source": [
|
|
667
|
+
"## REGR_AVGY Function"
|
|
668
|
+
]
|
|
669
|
+
},
|
|
670
|
+
{
|
|
671
|
+
"cell_type": "code",
|
|
672
|
+
"execution_count": 26,
|
|
673
|
+
"metadata": {},
|
|
674
|
+
"outputs": [],
|
|
675
|
+
"source": [
|
|
676
|
+
"# Function returns the mean of the dependent_variable_expression for all non-null data pairs of the \n",
|
|
677
|
+
"# dependent and independent variable arguments.\n",
|
|
678
|
+
"# Syntax:\n",
|
|
679
|
+
"# REGR_AVGY(dependent_value_expression, independent_value_expression)"
|
|
680
|
+
]
|
|
681
|
+
},
|
|
682
|
+
{
|
|
683
|
+
"cell_type": "code",
|
|
684
|
+
"execution_count": 27,
|
|
685
|
+
"metadata": {},
|
|
686
|
+
"outputs": [
|
|
687
|
+
{
|
|
688
|
+
"name": "stdout",
|
|
689
|
+
"output_type": "stream",
|
|
690
|
+
"text": [
|
|
691
|
+
"Equivalent SQL: select regr_avgy(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
692
|
+
"\n",
|
|
693
|
+
"\n",
|
|
694
|
+
" ************************* DataFrame ********************* \n",
|
|
695
|
+
" assined_col_\n",
|
|
696
|
+
"0 0.65\n",
|
|
697
|
+
"\n",
|
|
698
|
+
"\n",
|
|
699
|
+
"\n",
|
|
700
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
701
|
+
"assined_col_ float\n",
|
|
702
|
+
"\n",
|
|
703
|
+
"\n",
|
|
704
|
+
"\n",
|
|
705
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
706
|
+
]
|
|
707
|
+
}
|
|
708
|
+
],
|
|
709
|
+
"source": [
|
|
710
|
+
"df = admissions_train.assign(True, \n",
|
|
711
|
+
" assined_col_=func.regr_avgy(admissions_train.admitted.expression, \n",
|
|
712
|
+
" admissions_train.gpa.expression))\n",
|
|
713
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
714
|
+
]
|
|
715
|
+
},
|
|
716
|
+
{
|
|
717
|
+
"cell_type": "markdown",
|
|
718
|
+
"metadata": {},
|
|
719
|
+
"source": [
|
|
720
|
+
"## REGR_Count Function"
|
|
721
|
+
]
|
|
722
|
+
},
|
|
723
|
+
{
|
|
724
|
+
"cell_type": "code",
|
|
725
|
+
"execution_count": 28,
|
|
726
|
+
"metadata": {},
|
|
727
|
+
"outputs": [],
|
|
728
|
+
"source": [
|
|
729
|
+
"# Function returns the count of all non-null data pairs of the dependent and independent variable arguments.\n",
|
|
730
|
+
"# Syntax:\n",
|
|
731
|
+
"# REGR_count(dependent_value_expression, independent_value_expression)"
|
|
732
|
+
]
|
|
733
|
+
},
|
|
734
|
+
{
|
|
735
|
+
"cell_type": "code",
|
|
736
|
+
"execution_count": 29,
|
|
737
|
+
"metadata": {},
|
|
738
|
+
"outputs": [
|
|
739
|
+
{
|
|
740
|
+
"name": "stdout",
|
|
741
|
+
"output_type": "stream",
|
|
742
|
+
"text": [
|
|
743
|
+
"Equivalent SQL: select REGR_count(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
744
|
+
"\n",
|
|
745
|
+
"\n",
|
|
746
|
+
" ************************* DataFrame ********************* \n",
|
|
747
|
+
" assined_col_\n",
|
|
748
|
+
"0 40\n",
|
|
749
|
+
"\n",
|
|
750
|
+
"\n",
|
|
751
|
+
"\n",
|
|
752
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
753
|
+
"assined_col_ int\n",
|
|
754
|
+
"\n",
|
|
755
|
+
"\n",
|
|
756
|
+
"\n",
|
|
757
|
+
" 'assined_col_' Column Type: INTEGER\n"
|
|
758
|
+
]
|
|
759
|
+
}
|
|
760
|
+
],
|
|
761
|
+
"source": [
|
|
762
|
+
"df = admissions_train.assign(True, \n",
|
|
763
|
+
" assined_col_=func.REGR_count(admissions_train.admitted.expression, \n",
|
|
764
|
+
" admissions_train.gpa.expression))\n",
|
|
765
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
766
|
+
]
|
|
767
|
+
},
|
|
768
|
+
{
|
|
769
|
+
"cell_type": "markdown",
|
|
770
|
+
"metadata": {},
|
|
771
|
+
"source": [
|
|
772
|
+
"## REGR_Intercept Function"
|
|
773
|
+
]
|
|
774
|
+
},
|
|
775
|
+
{
|
|
776
|
+
"cell_type": "code",
|
|
777
|
+
"execution_count": 30,
|
|
778
|
+
"metadata": {},
|
|
779
|
+
"outputs": [],
|
|
780
|
+
"source": [
|
|
781
|
+
"# Function returns the intercept of the univariate linear regression line through all non-null data pairs of the \n",
|
|
782
|
+
"# dependent and independent variable arguments.\n",
|
|
783
|
+
"# Syntax:\n",
|
|
784
|
+
"# REGR_Intercept(dependent_value_expression, independent_value_expression)"
|
|
785
|
+
]
|
|
786
|
+
},
|
|
787
|
+
{
|
|
788
|
+
"cell_type": "code",
|
|
789
|
+
"execution_count": 31,
|
|
790
|
+
"metadata": {},
|
|
791
|
+
"outputs": [
|
|
792
|
+
{
|
|
793
|
+
"name": "stdout",
|
|
794
|
+
"output_type": "stream",
|
|
795
|
+
"text": [
|
|
796
|
+
"Equivalent SQL: select REGR_Intercept(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
797
|
+
"\n",
|
|
798
|
+
"\n",
|
|
799
|
+
" ************************* DataFrame ********************* \n",
|
|
800
|
+
" assined_col_\n",
|
|
801
|
+
"0 0.724144\n",
|
|
802
|
+
"\n",
|
|
803
|
+
"\n",
|
|
804
|
+
"\n",
|
|
805
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
806
|
+
"assined_col_ float\n",
|
|
807
|
+
"\n",
|
|
808
|
+
"\n",
|
|
809
|
+
"\n",
|
|
810
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
811
|
+
]
|
|
812
|
+
}
|
|
813
|
+
],
|
|
814
|
+
"source": [
|
|
815
|
+
"df = admissions_train.assign(True, \n",
|
|
816
|
+
" assined_col_=func.REGR_Intercept(admissions_train.admitted.expression, \n",
|
|
817
|
+
" admissions_train.gpa.expression))\n",
|
|
818
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
819
|
+
]
|
|
820
|
+
},
|
|
821
|
+
{
|
|
822
|
+
"cell_type": "markdown",
|
|
823
|
+
"metadata": {},
|
|
824
|
+
"source": [
|
|
825
|
+
"## REGR_R2 Function"
|
|
826
|
+
]
|
|
827
|
+
},
|
|
828
|
+
{
|
|
829
|
+
"cell_type": "code",
|
|
830
|
+
"execution_count": 32,
|
|
831
|
+
"metadata": {},
|
|
832
|
+
"outputs": [],
|
|
833
|
+
"source": [
|
|
834
|
+
"# Function returns the coefficient of determination for all non-null data pairs of the dependent and independent \n",
|
|
835
|
+
"# variable arguments.\n",
|
|
836
|
+
"# Syntax:\n",
|
|
837
|
+
"# REGR_R2(dependent_value_expression, independent_value_expression)"
|
|
838
|
+
]
|
|
839
|
+
},
|
|
840
|
+
{
|
|
841
|
+
"cell_type": "code",
|
|
842
|
+
"execution_count": 33,
|
|
843
|
+
"metadata": {},
|
|
844
|
+
"outputs": [
|
|
845
|
+
{
|
|
846
|
+
"name": "stdout",
|
|
847
|
+
"output_type": "stream",
|
|
848
|
+
"text": [
|
|
849
|
+
"Equivalent SQL: select REGR_R2(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
850
|
+
"\n",
|
|
851
|
+
"\n",
|
|
852
|
+
" ************************* DataFrame ********************* \n",
|
|
853
|
+
" assined_col_\n",
|
|
854
|
+
"0 0.000496\n",
|
|
855
|
+
"\n",
|
|
856
|
+
"\n",
|
|
857
|
+
"\n",
|
|
858
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
859
|
+
"assined_col_ float\n",
|
|
860
|
+
"\n",
|
|
861
|
+
"\n",
|
|
862
|
+
"\n",
|
|
863
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
864
|
+
]
|
|
865
|
+
}
|
|
866
|
+
],
|
|
867
|
+
"source": [
|
|
868
|
+
"df = admissions_train.assign(True, \n",
|
|
869
|
+
" assined_col_=func.REGR_R2(admissions_train.admitted.expression, \n",
|
|
870
|
+
" admissions_train.gpa.expression))\n",
|
|
871
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
872
|
+
]
|
|
873
|
+
},
|
|
874
|
+
{
|
|
875
|
+
"cell_type": "markdown",
|
|
876
|
+
"metadata": {},
|
|
877
|
+
"source": [
|
|
878
|
+
"## REGR_SLOPE Function"
|
|
879
|
+
]
|
|
880
|
+
},
|
|
881
|
+
{
|
|
882
|
+
"cell_type": "code",
|
|
883
|
+
"execution_count": 34,
|
|
884
|
+
"metadata": {},
|
|
885
|
+
"outputs": [],
|
|
886
|
+
"source": [
|
|
887
|
+
"# Function returns the slope of the univariate linear regression line through all non-null data pairs of the \n",
|
|
888
|
+
"# dependent and independent variable arguments.\n",
|
|
889
|
+
"# Syntax:\n",
|
|
890
|
+
"# REGR_SLOPE(dependent_value_expression, independent_value_expression)"
|
|
891
|
+
]
|
|
892
|
+
},
|
|
893
|
+
{
|
|
894
|
+
"cell_type": "code",
|
|
895
|
+
"execution_count": 35,
|
|
896
|
+
"metadata": {},
|
|
897
|
+
"outputs": [
|
|
898
|
+
{
|
|
899
|
+
"name": "stdout",
|
|
900
|
+
"output_type": "stream",
|
|
901
|
+
"text": [
|
|
902
|
+
"Equivalent SQL: select REGR_SLOPE(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
903
|
+
"\n",
|
|
904
|
+
"\n",
|
|
905
|
+
" ************************* DataFrame ********************* \n",
|
|
906
|
+
" assined_col_\n",
|
|
907
|
+
"0 -0.020934\n",
|
|
908
|
+
"\n",
|
|
909
|
+
"\n",
|
|
910
|
+
"\n",
|
|
911
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
912
|
+
"assined_col_ float\n",
|
|
913
|
+
"\n",
|
|
914
|
+
"\n",
|
|
915
|
+
"\n",
|
|
916
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
917
|
+
]
|
|
918
|
+
}
|
|
919
|
+
],
|
|
920
|
+
"source": [
|
|
921
|
+
"df = admissions_train.assign(True, \n",
|
|
922
|
+
" assined_col_=func.REGR_SLOPE(admissions_train.admitted.expression, \n",
|
|
923
|
+
" admissions_train.gpa.expression))\n",
|
|
924
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
925
|
+
]
|
|
926
|
+
},
|
|
927
|
+
{
|
|
928
|
+
"cell_type": "markdown",
|
|
929
|
+
"metadata": {},
|
|
930
|
+
"source": [
|
|
931
|
+
"## REGR_SXX Function"
|
|
932
|
+
]
|
|
933
|
+
},
|
|
934
|
+
{
|
|
935
|
+
"cell_type": "code",
|
|
936
|
+
"execution_count": 36,
|
|
937
|
+
"metadata": {},
|
|
938
|
+
"outputs": [],
|
|
939
|
+
"source": [
|
|
940
|
+
"# Function returns the sum of the squares of the independent_variable_expression for all non-null data pairs of the \n",
|
|
941
|
+
"# dependent and independent variable arguments.\n",
|
|
942
|
+
"# Syntax:\n",
|
|
943
|
+
"# REGR_SXX(dependent_value_expression, independent_value_expression)"
|
|
944
|
+
]
|
|
945
|
+
},
|
|
946
|
+
{
|
|
947
|
+
"cell_type": "code",
|
|
948
|
+
"execution_count": 37,
|
|
949
|
+
"metadata": {},
|
|
950
|
+
"outputs": [
|
|
951
|
+
{
|
|
952
|
+
"name": "stdout",
|
|
953
|
+
"output_type": "stream",
|
|
954
|
+
"text": [
|
|
955
|
+
"Equivalent SQL: select REGR_SXX(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
956
|
+
"\n",
|
|
957
|
+
"\n",
|
|
958
|
+
" ************************* DataFrame ********************* \n",
|
|
959
|
+
" assined_col_\n",
|
|
960
|
+
"0 10.294177\n",
|
|
961
|
+
"\n",
|
|
962
|
+
"\n",
|
|
963
|
+
"\n",
|
|
964
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
965
|
+
"assined_col_ float\n",
|
|
966
|
+
"\n",
|
|
967
|
+
"\n",
|
|
968
|
+
"\n",
|
|
969
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
970
|
+
]
|
|
971
|
+
}
|
|
972
|
+
],
|
|
973
|
+
"source": [
|
|
974
|
+
"df = admissions_train.assign(True, \n",
|
|
975
|
+
" assined_col_=func.REGR_SXX(admissions_train.admitted.expression, \n",
|
|
976
|
+
" admissions_train.gpa.expression))\n",
|
|
977
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
978
|
+
]
|
|
979
|
+
},
|
|
980
|
+
{
|
|
981
|
+
"cell_type": "markdown",
|
|
982
|
+
"metadata": {},
|
|
983
|
+
"source": [
|
|
984
|
+
"## REGR_SXY Function"
|
|
985
|
+
]
|
|
986
|
+
},
|
|
987
|
+
{
|
|
988
|
+
"cell_type": "code",
|
|
989
|
+
"execution_count": 38,
|
|
990
|
+
"metadata": {},
|
|
991
|
+
"outputs": [],
|
|
992
|
+
"source": [
|
|
993
|
+
"# Function returns the sum of the products of the independent_variable_expression and the dependent_variable_expression \n",
|
|
994
|
+
"# for all non-null data pairs of the dependent and independent variable arguments.\n",
|
|
995
|
+
"# Syntax:\n",
|
|
996
|
+
"# REGR_SXY(dependent_value_expression, independent_value_expression)"
|
|
997
|
+
]
|
|
998
|
+
},
|
|
999
|
+
{
|
|
1000
|
+
"cell_type": "code",
|
|
1001
|
+
"execution_count": 39,
|
|
1002
|
+
"metadata": {},
|
|
1003
|
+
"outputs": [
|
|
1004
|
+
{
|
|
1005
|
+
"name": "stdout",
|
|
1006
|
+
"output_type": "stream",
|
|
1007
|
+
"text": [
|
|
1008
|
+
"Equivalent SQL: select REGR_SXY(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1009
|
+
"\n",
|
|
1010
|
+
"\n",
|
|
1011
|
+
" ************************* DataFrame ********************* \n",
|
|
1012
|
+
" assined_col_\n",
|
|
1013
|
+
"0 -0.2155\n",
|
|
1014
|
+
"\n",
|
|
1015
|
+
"\n",
|
|
1016
|
+
"\n",
|
|
1017
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1018
|
+
"assined_col_ float\n",
|
|
1019
|
+
"\n",
|
|
1020
|
+
"\n",
|
|
1021
|
+
"\n",
|
|
1022
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1023
|
+
]
|
|
1024
|
+
}
|
|
1025
|
+
],
|
|
1026
|
+
"source": [
|
|
1027
|
+
"df = admissions_train.assign(True, \n",
|
|
1028
|
+
" assined_col_=func.REGR_SXY(admissions_train.admitted.expression, \n",
|
|
1029
|
+
" admissions_train.gpa.expression))\n",
|
|
1030
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1031
|
+
]
|
|
1032
|
+
},
|
|
1033
|
+
{
|
|
1034
|
+
"cell_type": "markdown",
|
|
1035
|
+
"metadata": {},
|
|
1036
|
+
"source": [
|
|
1037
|
+
"## REGR_SYY Function"
|
|
1038
|
+
]
|
|
1039
|
+
},
|
|
1040
|
+
{
|
|
1041
|
+
"cell_type": "code",
|
|
1042
|
+
"execution_count": 40,
|
|
1043
|
+
"metadata": {},
|
|
1044
|
+
"outputs": [],
|
|
1045
|
+
"source": [
|
|
1046
|
+
"# Function returns the sum of the squares of the dependent_variable_expression for all non-null data pairs of the \n",
|
|
1047
|
+
"# dependent and independent variable arguments.\n",
|
|
1048
|
+
"# Syntax:\n",
|
|
1049
|
+
"# REGR_SYY(dependent_value_expression, independent_value_expression)"
|
|
1050
|
+
]
|
|
1051
|
+
},
|
|
1052
|
+
{
|
|
1053
|
+
"cell_type": "code",
|
|
1054
|
+
"execution_count": 41,
|
|
1055
|
+
"metadata": {},
|
|
1056
|
+
"outputs": [
|
|
1057
|
+
{
|
|
1058
|
+
"name": "stdout",
|
|
1059
|
+
"output_type": "stream",
|
|
1060
|
+
"text": [
|
|
1061
|
+
"Equivalent SQL: select REGR_SYY(admitted, gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1062
|
+
"\n",
|
|
1063
|
+
"\n",
|
|
1064
|
+
" ************************* DataFrame ********************* \n",
|
|
1065
|
+
" assined_col_\n",
|
|
1066
|
+
"0 9.1\n",
|
|
1067
|
+
"\n",
|
|
1068
|
+
"\n",
|
|
1069
|
+
"\n",
|
|
1070
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1071
|
+
"assined_col_ float\n",
|
|
1072
|
+
"\n",
|
|
1073
|
+
"\n",
|
|
1074
|
+
"\n",
|
|
1075
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1076
|
+
]
|
|
1077
|
+
}
|
|
1078
|
+
],
|
|
1079
|
+
"source": [
|
|
1080
|
+
"df = admissions_train.assign(True, \n",
|
|
1081
|
+
" assined_col_=func.REGR_SYY(admissions_train.admitted.expression, \n",
|
|
1082
|
+
" admissions_train.gpa.expression))\n",
|
|
1083
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1084
|
+
]
|
|
1085
|
+
},
|
|
1086
|
+
{
|
|
1087
|
+
"cell_type": "markdown",
|
|
1088
|
+
"metadata": {},
|
|
1089
|
+
"source": [
|
|
1090
|
+
"## Skew Function"
|
|
1091
|
+
]
|
|
1092
|
+
},
|
|
1093
|
+
{
|
|
1094
|
+
"cell_type": "code",
|
|
1095
|
+
"execution_count": 42,
|
|
1096
|
+
"metadata": {},
|
|
1097
|
+
"outputs": [],
|
|
1098
|
+
"source": [
|
|
1099
|
+
"# Function returns the skewness of the distribution of value_expression.\n",
|
|
1100
|
+
"# Syntax:\n",
|
|
1101
|
+
"# skew(value_expression)"
|
|
1102
|
+
]
|
|
1103
|
+
},
|
|
1104
|
+
{
|
|
1105
|
+
"cell_type": "code",
|
|
1106
|
+
"execution_count": 43,
|
|
1107
|
+
"metadata": {},
|
|
1108
|
+
"outputs": [
|
|
1109
|
+
{
|
|
1110
|
+
"name": "stdout",
|
|
1111
|
+
"output_type": "stream",
|
|
1112
|
+
"text": [
|
|
1113
|
+
"Equivalent SQL: select skew(gpa) AS assined_col_float, skew(admitted) AS assined_col_int from \"admissions_train\"\n",
|
|
1114
|
+
"\n",
|
|
1115
|
+
"\n",
|
|
1116
|
+
" ************************* DataFrame ********************* \n",
|
|
1117
|
+
" assined_col_float assined_col_int\n",
|
|
1118
|
+
"0 -2.058969 -0.653746\n",
|
|
1119
|
+
"\n",
|
|
1120
|
+
"\n",
|
|
1121
|
+
"\n",
|
|
1122
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1123
|
+
"assined_col_float float\n",
|
|
1124
|
+
"assined_col_int float\n",
|
|
1125
|
+
"\n",
|
|
1126
|
+
"\n",
|
|
1127
|
+
"\n",
|
|
1128
|
+
" 'assined_col_int' Column Type: FLOAT\n",
|
|
1129
|
+
" 'assined_col_float' Column Type: FLOAT\n"
|
|
1130
|
+
]
|
|
1131
|
+
}
|
|
1132
|
+
],
|
|
1133
|
+
"source": [
|
|
1134
|
+
"df = admissions_train.assign(True, assined_col_int=func.skew(admissions_train.admitted.expression),\n",
|
|
1135
|
+
" assined_col_float=func.skew(admissions_train.gpa.expression))\n",
|
|
1136
|
+
"print_variables(df, [\"assined_col_int\", \"assined_col_float\"])"
|
|
1137
|
+
]
|
|
1138
|
+
},
|
|
1139
|
+
{
|
|
1140
|
+
"cell_type": "markdown",
|
|
1141
|
+
"metadata": {},
|
|
1142
|
+
"source": [
|
|
1143
|
+
"## stddev_pop Function"
|
|
1144
|
+
]
|
|
1145
|
+
},
|
|
1146
|
+
{
|
|
1147
|
+
"cell_type": "code",
|
|
1148
|
+
"execution_count": 44,
|
|
1149
|
+
"metadata": {},
|
|
1150
|
+
"outputs": [],
|
|
1151
|
+
"source": [
|
|
1152
|
+
"# Function returns the population standard deviation for the non-null data points in value_expression.\n",
|
|
1153
|
+
"# Syntax:\n",
|
|
1154
|
+
"# stddev_pop(value_expression)"
|
|
1155
|
+
]
|
|
1156
|
+
},
|
|
1157
|
+
{
|
|
1158
|
+
"cell_type": "code",
|
|
1159
|
+
"execution_count": 45,
|
|
1160
|
+
"metadata": {},
|
|
1161
|
+
"outputs": [
|
|
1162
|
+
{
|
|
1163
|
+
"name": "stdout",
|
|
1164
|
+
"output_type": "stream",
|
|
1165
|
+
"text": [
|
|
1166
|
+
"Equivalent SQL: select stddev_pop(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1167
|
+
"\n",
|
|
1168
|
+
"\n",
|
|
1169
|
+
" ************************* DataFrame ********************* \n",
|
|
1170
|
+
" assined_col_\n",
|
|
1171
|
+
"0 0.507301\n",
|
|
1172
|
+
"\n",
|
|
1173
|
+
"\n",
|
|
1174
|
+
"\n",
|
|
1175
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1176
|
+
"assined_col_ float\n",
|
|
1177
|
+
"\n",
|
|
1178
|
+
"\n",
|
|
1179
|
+
"\n",
|
|
1180
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1181
|
+
]
|
|
1182
|
+
}
|
|
1183
|
+
],
|
|
1184
|
+
"source": [
|
|
1185
|
+
"df = admissions_train.assign(True, assined_col_=func.stddev_pop(admissions_train.gpa.expression))\n",
|
|
1186
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1187
|
+
]
|
|
1188
|
+
},
|
|
1189
|
+
{
|
|
1190
|
+
"cell_type": "markdown",
|
|
1191
|
+
"metadata": {},
|
|
1192
|
+
"source": [
|
|
1193
|
+
"## stddev_samp Function"
|
|
1194
|
+
]
|
|
1195
|
+
},
|
|
1196
|
+
{
|
|
1197
|
+
"cell_type": "code",
|
|
1198
|
+
"execution_count": 46,
|
|
1199
|
+
"metadata": {},
|
|
1200
|
+
"outputs": [],
|
|
1201
|
+
"source": [
|
|
1202
|
+
"# Function returns the sample standard deviation for the non-null data points in value_expression.\n",
|
|
1203
|
+
"# Syntax:\n",
|
|
1204
|
+
"# stddev_samp(value_expression)"
|
|
1205
|
+
]
|
|
1206
|
+
},
|
|
1207
|
+
{
|
|
1208
|
+
"cell_type": "code",
|
|
1209
|
+
"execution_count": 47,
|
|
1210
|
+
"metadata": {},
|
|
1211
|
+
"outputs": [
|
|
1212
|
+
{
|
|
1213
|
+
"name": "stdout",
|
|
1214
|
+
"output_type": "stream",
|
|
1215
|
+
"text": [
|
|
1216
|
+
"Equivalent SQL: select stddev_samp(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1217
|
+
"\n",
|
|
1218
|
+
"\n",
|
|
1219
|
+
" ************************* DataFrame ********************* \n",
|
|
1220
|
+
" assined_col_\n",
|
|
1221
|
+
"0 0.513764\n",
|
|
1222
|
+
"\n",
|
|
1223
|
+
"\n",
|
|
1224
|
+
"\n",
|
|
1225
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1226
|
+
"assined_col_ float\n",
|
|
1227
|
+
"\n",
|
|
1228
|
+
"\n",
|
|
1229
|
+
"\n",
|
|
1230
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1231
|
+
]
|
|
1232
|
+
}
|
|
1233
|
+
],
|
|
1234
|
+
"source": [
|
|
1235
|
+
"df = admissions_train.assign(True, assined_col_=func.stddev_samp(admissions_train.gpa.expression))\n",
|
|
1236
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1237
|
+
]
|
|
1238
|
+
},
|
|
1239
|
+
{
|
|
1240
|
+
"cell_type": "markdown",
|
|
1241
|
+
"metadata": {},
|
|
1242
|
+
"source": [
|
|
1243
|
+
"## sum Function"
|
|
1244
|
+
]
|
|
1245
|
+
},
|
|
1246
|
+
{
|
|
1247
|
+
"cell_type": "code",
|
|
1248
|
+
"execution_count": 48,
|
|
1249
|
+
"metadata": {},
|
|
1250
|
+
"outputs": [],
|
|
1251
|
+
"source": [
|
|
1252
|
+
"# Function returns a column value that is the arithmetic sum of value_expression.\n",
|
|
1253
|
+
"# Syntax:\n",
|
|
1254
|
+
"# sum(value_expression)"
|
|
1255
|
+
]
|
|
1256
|
+
},
|
|
1257
|
+
{
|
|
1258
|
+
"cell_type": "code",
|
|
1259
|
+
"execution_count": 49,
|
|
1260
|
+
"metadata": {},
|
|
1261
|
+
"outputs": [
|
|
1262
|
+
{
|
|
1263
|
+
"name": "stdout",
|
|
1264
|
+
"output_type": "stream",
|
|
1265
|
+
"text": [
|
|
1266
|
+
"Equivalent SQL: select sum(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1267
|
+
"\n",
|
|
1268
|
+
"\n",
|
|
1269
|
+
" ************************* DataFrame ********************* \n",
|
|
1270
|
+
" assined_col_\n",
|
|
1271
|
+
"0 141.67\n",
|
|
1272
|
+
"\n",
|
|
1273
|
+
"\n",
|
|
1274
|
+
"\n",
|
|
1275
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1276
|
+
"assined_col_ float\n",
|
|
1277
|
+
"\n",
|
|
1278
|
+
"\n",
|
|
1279
|
+
"\n",
|
|
1280
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1281
|
+
]
|
|
1282
|
+
}
|
|
1283
|
+
],
|
|
1284
|
+
"source": [
|
|
1285
|
+
"df = admissions_train.assign(True, assined_col_=func.sum(admissions_train.gpa.expression))\n",
|
|
1286
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1287
|
+
]
|
|
1288
|
+
},
|
|
1289
|
+
{
|
|
1290
|
+
"cell_type": "markdown",
|
|
1291
|
+
"metadata": {},
|
|
1292
|
+
"source": [
|
|
1293
|
+
"## var_pop Function"
|
|
1294
|
+
]
|
|
1295
|
+
},
|
|
1296
|
+
{
|
|
1297
|
+
"cell_type": "code",
|
|
1298
|
+
"execution_count": 50,
|
|
1299
|
+
"metadata": {},
|
|
1300
|
+
"outputs": [],
|
|
1301
|
+
"source": [
|
|
1302
|
+
"# Function returns the population variance for the data points in value_expression.\n",
|
|
1303
|
+
"# Syntax:\n",
|
|
1304
|
+
"# var_pop(value_expression)"
|
|
1305
|
+
]
|
|
1306
|
+
},
|
|
1307
|
+
{
|
|
1308
|
+
"cell_type": "code",
|
|
1309
|
+
"execution_count": 51,
|
|
1310
|
+
"metadata": {},
|
|
1311
|
+
"outputs": [
|
|
1312
|
+
{
|
|
1313
|
+
"name": "stdout",
|
|
1314
|
+
"output_type": "stream",
|
|
1315
|
+
"text": [
|
|
1316
|
+
"Equivalent SQL: select var_pop(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1317
|
+
"\n",
|
|
1318
|
+
"\n",
|
|
1319
|
+
" ************************* DataFrame ********************* \n",
|
|
1320
|
+
" assined_col_\n",
|
|
1321
|
+
"0 0.257354\n",
|
|
1322
|
+
"\n",
|
|
1323
|
+
"\n",
|
|
1324
|
+
"\n",
|
|
1325
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1326
|
+
"assined_col_ float\n",
|
|
1327
|
+
"\n",
|
|
1328
|
+
"\n",
|
|
1329
|
+
"\n",
|
|
1330
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1331
|
+
]
|
|
1332
|
+
}
|
|
1333
|
+
],
|
|
1334
|
+
"source": [
|
|
1335
|
+
"df = admissions_train.assign(True, assined_col_=func.var_pop(admissions_train.gpa.expression))\n",
|
|
1336
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1337
|
+
]
|
|
1338
|
+
},
|
|
1339
|
+
{
|
|
1340
|
+
"cell_type": "markdown",
|
|
1341
|
+
"metadata": {},
|
|
1342
|
+
"source": [
|
|
1343
|
+
"## var_samp Function"
|
|
1344
|
+
]
|
|
1345
|
+
},
|
|
1346
|
+
{
|
|
1347
|
+
"cell_type": "code",
|
|
1348
|
+
"execution_count": 52,
|
|
1349
|
+
"metadata": {},
|
|
1350
|
+
"outputs": [],
|
|
1351
|
+
"source": [
|
|
1352
|
+
"# Function returns the sample variance for the data points in value_expression.\n",
|
|
1353
|
+
"# Syntax:\n",
|
|
1354
|
+
"# var_samp(value_expression)"
|
|
1355
|
+
]
|
|
1356
|
+
},
|
|
1357
|
+
{
|
|
1358
|
+
"cell_type": "code",
|
|
1359
|
+
"execution_count": 53,
|
|
1360
|
+
"metadata": {},
|
|
1361
|
+
"outputs": [
|
|
1362
|
+
{
|
|
1363
|
+
"name": "stdout",
|
|
1364
|
+
"output_type": "stream",
|
|
1365
|
+
"text": [
|
|
1366
|
+
"Equivalent SQL: select var_samp(gpa) AS assined_col_ from \"admissions_train\"\n",
|
|
1367
|
+
"\n",
|
|
1368
|
+
"\n",
|
|
1369
|
+
" ************************* DataFrame ********************* \n",
|
|
1370
|
+
" assined_col_\n",
|
|
1371
|
+
"0 0.263953\n",
|
|
1372
|
+
"\n",
|
|
1373
|
+
"\n",
|
|
1374
|
+
"\n",
|
|
1375
|
+
" ************************* DataFrame.dtypes ********************* \n",
|
|
1376
|
+
"assined_col_ float\n",
|
|
1377
|
+
"\n",
|
|
1378
|
+
"\n",
|
|
1379
|
+
"\n",
|
|
1380
|
+
" 'assined_col_' Column Type: FLOAT\n"
|
|
1381
|
+
]
|
|
1382
|
+
}
|
|
1383
|
+
],
|
|
1384
|
+
"source": [
|
|
1385
|
+
"df = admissions_train.assign(True, assined_col_=func.var_samp(admissions_train.gpa.expression))\n",
|
|
1386
|
+
"print_variables(df, [\"assined_col_\"])"
|
|
1387
|
+
]
|
|
1388
|
+
},
|
|
1389
|
+
{
|
|
1390
|
+
"cell_type": "code",
|
|
1391
|
+
"execution_count": 54,
|
|
1392
|
+
"metadata": {},
|
|
1393
|
+
"outputs": [
|
|
1394
|
+
{
|
|
1395
|
+
"data": {
|
|
1396
|
+
"text/plain": [
|
|
1397
|
+
"True"
|
|
1398
|
+
]
|
|
1399
|
+
},
|
|
1400
|
+
"execution_count": 54,
|
|
1401
|
+
"metadata": {},
|
|
1402
|
+
"output_type": "execute_result"
|
|
1403
|
+
}
|
|
1404
|
+
],
|
|
1405
|
+
"source": [
|
|
1406
|
+
"# One must run remove_context() to close the connection and garbage collect internally generated objects.\n",
|
|
1407
|
+
"remove_context()"
|
|
1408
|
+
]
|
|
1409
|
+
},
|
|
1410
|
+
{
|
|
1411
|
+
"cell_type": "code",
|
|
1412
|
+
"execution_count": 55,
|
|
1413
|
+
"metadata": {},
|
|
1414
|
+
"outputs": [],
|
|
1415
|
+
"source": [
|
|
1416
|
+
"## Grouping, pivot, unpivot - Not possible to use."
|
|
1417
|
+
]
|
|
1418
|
+
},
|
|
1419
|
+
{
|
|
1420
|
+
"cell_type": "code",
|
|
1421
|
+
"execution_count": null,
|
|
1422
|
+
"metadata": {},
|
|
1423
|
+
"outputs": [],
|
|
1424
|
+
"source": []
|
|
1425
|
+
},
|
|
1426
|
+
{
|
|
1427
|
+
"cell_type": "code",
|
|
1428
|
+
"execution_count": null,
|
|
1429
|
+
"metadata": {},
|
|
1430
|
+
"outputs": [],
|
|
1431
|
+
"source": []
|
|
1432
|
+
}
|
|
1433
|
+
],
|
|
1434
|
+
"metadata": {
|
|
1435
|
+
"kernelspec": {
|
|
1436
|
+
"display_name": "Python 3",
|
|
1437
|
+
"language": "python",
|
|
1438
|
+
"name": "python3"
|
|
1439
|
+
},
|
|
1440
|
+
"language_info": {
|
|
1441
|
+
"codemirror_mode": {
|
|
1442
|
+
"name": "ipython",
|
|
1443
|
+
"version": 3
|
|
1444
|
+
},
|
|
1445
|
+
"file_extension": ".py",
|
|
1446
|
+
"mimetype": "text/x-python",
|
|
1447
|
+
"name": "python",
|
|
1448
|
+
"nbconvert_exporter": "python",
|
|
1449
|
+
"pygments_lexer": "ipython3",
|
|
1450
|
+
"version": "3.7.1"
|
|
1451
|
+
}
|
|
1452
|
+
},
|
|
1453
|
+
"nbformat": 4,
|
|
1454
|
+
"nbformat_minor": 2
|
|
1455
|
+
}
|