teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +238 -1
- teradataml/__init__.py +13 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/Transformations.py +4 -4
- teradataml/analytics/__init__.py +0 -2
- teradataml/analytics/analytic_function_executor.py +3 -0
- teradataml/analytics/json_parser/utils.py +13 -12
- teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
- teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
- teradataml/analytics/sqle/__init__.py +0 -13
- teradataml/analytics/utils.py +1 -0
- teradataml/analytics/valib.py +3 -0
- teradataml/automl/__init__.py +1628 -0
- teradataml/automl/custom_json_utils.py +1270 -0
- teradataml/automl/data_preparation.py +993 -0
- teradataml/automl/data_transformation.py +727 -0
- teradataml/automl/feature_engineering.py +1648 -0
- teradataml/automl/feature_exploration.py +547 -0
- teradataml/automl/model_evaluation.py +163 -0
- teradataml/automl/model_training.py +887 -0
- teradataml/catalog/__init__.py +0 -2
- teradataml/catalog/byom.py +49 -6
- teradataml/catalog/function_argument_mapper.py +0 -2
- teradataml/catalog/model_cataloging_utils.py +2 -1021
- teradataml/common/aed_utils.py +6 -2
- teradataml/common/constants.py +50 -58
- teradataml/common/deprecations.py +160 -0
- teradataml/common/garbagecollector.py +61 -104
- teradataml/common/messagecodes.py +27 -36
- teradataml/common/messages.py +11 -15
- teradataml/common/utils.py +205 -287
- teradataml/common/wrapper_utils.py +1 -110
- teradataml/context/context.py +150 -78
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
- teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
- teradataml/data/fish.csv +160 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/insurance.csv +1 -1
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
- teradataml/data/load_example_data.py +3 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/openml_example.json +63 -0
- teradataml/data/scripts/deploy_script.py +65 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
- teradataml/data/templates/open_source_ml.json +9 -0
- teradataml/data/teradataml_example.json +73 -1
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/dataframe/copy_to.py +79 -13
- teradataml/dataframe/data_transfer.py +8 -0
- teradataml/dataframe/dataframe.py +910 -311
- teradataml/dataframe/dataframe_utils.py +102 -5
- teradataml/dataframe/fastload.py +11 -3
- teradataml/dataframe/setop.py +15 -2
- teradataml/dataframe/sql.py +3735 -77
- teradataml/dataframe/sql_function_parameters.py +56 -5
- teradataml/dataframe/vantage_function_types.py +45 -1
- teradataml/dataframe/window.py +30 -29
- teradataml/dbutils/dbutils.py +18 -1
- teradataml/geospatial/geodataframe.py +18 -7
- teradataml/geospatial/geodataframecolumn.py +5 -0
- teradataml/hyperparameter_tuner/optimizer.py +910 -120
- teradataml/hyperparameter_tuner/utils.py +131 -37
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/sklearn/__init__.py +1 -0
- teradataml/opensource/sklearn/_class.py +255 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
- teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
- teradataml/opensource/sklearn/constants.py +54 -0
- teradataml/options/__init__.py +3 -6
- teradataml/options/configure.py +21 -20
- teradataml/scriptmgmt/UserEnv.py +61 -5
- teradataml/scriptmgmt/lls_utils.py +135 -53
- teradataml/table_operators/Apply.py +38 -6
- teradataml/table_operators/Script.py +45 -308
- teradataml/table_operators/TableOperator.py +182 -591
- teradataml/table_operators/__init__.py +0 -1
- teradataml/table_operators/table_operator_util.py +32 -40
- teradataml/utils/validators.py +127 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
- teradataml/analytics/mle/AdaBoost.py +0 -651
- teradataml/analytics/mle/AdaBoostPredict.py +0 -564
- teradataml/analytics/mle/Antiselect.py +0 -342
- teradataml/analytics/mle/Arima.py +0 -641
- teradataml/analytics/mle/ArimaPredict.py +0 -477
- teradataml/analytics/mle/Attribution.py +0 -1070
- teradataml/analytics/mle/Betweenness.py +0 -658
- teradataml/analytics/mle/Burst.py +0 -711
- teradataml/analytics/mle/CCM.py +0 -600
- teradataml/analytics/mle/CCMPrepare.py +0 -324
- teradataml/analytics/mle/CFilter.py +0 -460
- teradataml/analytics/mle/ChangePointDetection.py +0 -572
- teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
- teradataml/analytics/mle/Closeness.py +0 -737
- teradataml/analytics/mle/ConfusionMatrix.py +0 -420
- teradataml/analytics/mle/Correlation.py +0 -477
- teradataml/analytics/mle/Correlation2.py +0 -573
- teradataml/analytics/mle/CoxHazardRatio.py +0 -679
- teradataml/analytics/mle/CoxPH.py +0 -556
- teradataml/analytics/mle/CoxSurvival.py +0 -478
- teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
- teradataml/analytics/mle/DTW.py +0 -623
- teradataml/analytics/mle/DWT.py +0 -564
- teradataml/analytics/mle/DWT2D.py +0 -599
- teradataml/analytics/mle/DecisionForest.py +0 -716
- teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
- teradataml/analytics/mle/DecisionForestPredict.py +0 -561
- teradataml/analytics/mle/DecisionTree.py +0 -830
- teradataml/analytics/mle/DecisionTreePredict.py +0 -528
- teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
- teradataml/analytics/mle/FMeasure.py +0 -402
- teradataml/analytics/mle/FPGrowth.py +0 -734
- teradataml/analytics/mle/FrequentPaths.py +0 -695
- teradataml/analytics/mle/GLM.py +0 -558
- teradataml/analytics/mle/GLML1L2.py +0 -547
- teradataml/analytics/mle/GLML1L2Predict.py +0 -519
- teradataml/analytics/mle/GLMPredict.py +0 -529
- teradataml/analytics/mle/HMMDecoder.py +0 -945
- teradataml/analytics/mle/HMMEvaluator.py +0 -901
- teradataml/analytics/mle/HMMSupervised.py +0 -521
- teradataml/analytics/mle/HMMUnsupervised.py +0 -572
- teradataml/analytics/mle/Histogram.py +0 -561
- teradataml/analytics/mle/IDWT.py +0 -476
- teradataml/analytics/mle/IDWT2D.py +0 -493
- teradataml/analytics/mle/IdentityMatch.py +0 -763
- teradataml/analytics/mle/Interpolator.py +0 -918
- teradataml/analytics/mle/KMeans.py +0 -485
- teradataml/analytics/mle/KNN.py +0 -627
- teradataml/analytics/mle/KNNRecommender.py +0 -488
- teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
- teradataml/analytics/mle/LAR.py +0 -439
- teradataml/analytics/mle/LARPredict.py +0 -478
- teradataml/analytics/mle/LDA.py +0 -548
- teradataml/analytics/mle/LDAInference.py +0 -492
- teradataml/analytics/mle/LDATopicSummary.py +0 -464
- teradataml/analytics/mle/LevenshteinDistance.py +0 -450
- teradataml/analytics/mle/LinReg.py +0 -433
- teradataml/analytics/mle/LinRegPredict.py +0 -438
- teradataml/analytics/mle/MinHash.py +0 -544
- teradataml/analytics/mle/Modularity.py +0 -587
- teradataml/analytics/mle/NEREvaluator.py +0 -410
- teradataml/analytics/mle/NERExtractor.py +0 -595
- teradataml/analytics/mle/NERTrainer.py +0 -458
- teradataml/analytics/mle/NGrams.py +0 -570
- teradataml/analytics/mle/NPath.py +0 -634
- teradataml/analytics/mle/NTree.py +0 -549
- teradataml/analytics/mle/NaiveBayes.py +0 -462
- teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
- teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
- teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
- teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
- teradataml/analytics/mle/NamedEntityFinder.py +0 -529
- teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
- teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
- teradataml/analytics/mle/POSTagger.py +0 -417
- teradataml/analytics/mle/Pack.py +0 -411
- teradataml/analytics/mle/PageRank.py +0 -535
- teradataml/analytics/mle/PathAnalyzer.py +0 -426
- teradataml/analytics/mle/PathGenerator.py +0 -367
- teradataml/analytics/mle/PathStart.py +0 -464
- teradataml/analytics/mle/PathSummarizer.py +0 -470
- teradataml/analytics/mle/Pivot.py +0 -471
- teradataml/analytics/mle/ROC.py +0 -425
- teradataml/analytics/mle/RandomSample.py +0 -637
- teradataml/analytics/mle/RandomWalkSample.py +0 -490
- teradataml/analytics/mle/SAX.py +0 -779
- teradataml/analytics/mle/SVMDense.py +0 -677
- teradataml/analytics/mle/SVMDensePredict.py +0 -536
- teradataml/analytics/mle/SVMDenseSummary.py +0 -437
- teradataml/analytics/mle/SVMSparse.py +0 -557
- teradataml/analytics/mle/SVMSparsePredict.py +0 -553
- teradataml/analytics/mle/SVMSparseSummary.py +0 -435
- teradataml/analytics/mle/Sampling.py +0 -549
- teradataml/analytics/mle/Scale.py +0 -565
- teradataml/analytics/mle/ScaleByPartition.py +0 -496
- teradataml/analytics/mle/ScaleMap.py +0 -378
- teradataml/analytics/mle/ScaleSummary.py +0 -320
- teradataml/analytics/mle/SentenceExtractor.py +0 -363
- teradataml/analytics/mle/SentimentEvaluator.py +0 -432
- teradataml/analytics/mle/SentimentExtractor.py +0 -578
- teradataml/analytics/mle/SentimentTrainer.py +0 -405
- teradataml/analytics/mle/SeriesSplitter.py +0 -641
- teradataml/analytics/mle/Sessionize.py +0 -475
- teradataml/analytics/mle/SimpleMovAvg.py +0 -397
- teradataml/analytics/mle/StringSimilarity.py +0 -425
- teradataml/analytics/mle/TF.py +0 -389
- teradataml/analytics/mle/TFIDF.py +0 -504
- teradataml/analytics/mle/TextChunker.py +0 -414
- teradataml/analytics/mle/TextClassifier.py +0 -399
- teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
- teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
- teradataml/analytics/mle/TextMorph.py +0 -494
- teradataml/analytics/mle/TextParser.py +0 -623
- teradataml/analytics/mle/TextTagger.py +0 -530
- teradataml/analytics/mle/TextTokenizer.py +0 -502
- teradataml/analytics/mle/UnivariateStatistics.py +0 -488
- teradataml/analytics/mle/Unpack.py +0 -526
- teradataml/analytics/mle/Unpivot.py +0 -438
- teradataml/analytics/mle/VarMax.py +0 -776
- teradataml/analytics/mle/VectorDistance.py +0 -762
- teradataml/analytics/mle/WeightedMovAvg.py +0 -400
- teradataml/analytics/mle/XGBoost.py +0 -842
- teradataml/analytics/mle/XGBoostPredict.py +0 -627
- teradataml/analytics/mle/__init__.py +0 -123
- teradataml/analytics/mle/json/adaboost_mle.json +0 -135
- teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
- teradataml/analytics/mle/json/antiselect_mle.json +0 -34
- teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
- teradataml/analytics/mle/json/arima_mle.json +0 -172
- teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
- teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
- teradataml/analytics/mle/json/betweenness_mle.json +0 -97
- teradataml/analytics/mle/json/burst_mle.json +0 -140
- teradataml/analytics/mle/json/ccm_mle.json +0 -124
- teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
- teradataml/analytics/mle/json/cfilter_mle.json +0 -93
- teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
- teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
- teradataml/analytics/mle/json/closeness_mle.json +0 -104
- teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
- teradataml/analytics/mle/json/correlation_mle.json +0 -86
- teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
- teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
- teradataml/analytics/mle/json/coxph_mle.json +0 -98
- teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
- teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
- teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
- teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
- teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
- teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
- teradataml/analytics/mle/json/dtw_mle.json +0 -97
- teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
- teradataml/analytics/mle/json/dwt_mle.json +0 -101
- teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
- teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
- teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
- teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
- teradataml/analytics/mle/json/glm_mle.json +0 -111
- teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
- teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
- teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/histogram_mle.json +0 -100
- teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
- teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
- teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
- teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
- teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
- teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
- teradataml/analytics/mle/json/idwt_mle.json +0 -66
- teradataml/analytics/mle/json/interpolator_mle.json +0 -151
- teradataml/analytics/mle/json/kmeans_mle.json +0 -97
- teradataml/analytics/mle/json/knn_mle.json +0 -141
- teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
- teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
- teradataml/analytics/mle/json/lar_mle.json +0 -78
- teradataml/analytics/mle/json/larpredict_mle.json +0 -69
- teradataml/analytics/mle/json/lda_mle.json +0 -130
- teradataml/analytics/mle/json/ldainference_mle.json +0 -78
- teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
- teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
- teradataml/analytics/mle/json/linreg_mle.json +0 -42
- teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
- teradataml/analytics/mle/json/minhash_mle.json +0 -113
- teradataml/analytics/mle/json/modularity_mle.json +0 -91
- teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
- teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
- teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
- teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
- teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
- teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
- teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
- teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
- teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
- teradataml/analytics/mle/json/ngrams_mle.json +0 -137
- teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
- teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
- teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
- teradataml/analytics/mle/json/pack_mle.json +0 -58
- teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
- teradataml/analytics/mle/json/pagerank_mle.json +0 -81
- teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
- teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
- teradataml/analytics/mle/json/pathstart_mle.json +0 -62
- teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
- teradataml/analytics/mle/json/pivoting_mle.json +0 -71
- teradataml/analytics/mle/json/postagger_mle.json +0 -51
- teradataml/analytics/mle/json/randomsample_mle.json +0 -131
- teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
- teradataml/analytics/mle/json/roc_mle.json +0 -73
- teradataml/analytics/mle/json/sampling_mle.json +0 -75
- teradataml/analytics/mle/json/sax_mle.json +0 -154
- teradataml/analytics/mle/json/scale_mle.json +0 -93
- teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
- teradataml/analytics/mle/json/scalemap_mle.json +0 -44
- teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
- teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
- teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
- teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
- teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
- teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
- teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
- teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
- teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
- teradataml/analytics/mle/json/svmdense_mle.json +0 -165
- teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
- teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
- teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
- teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
- teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
- teradataml/analytics/mle/json/textchunker_mle.json +0 -40
- teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
- teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
- teradataml/analytics/mle/json/textmorph_mle.json +0 -63
- teradataml/analytics/mle/json/textparser_mle.json +0 -166
- teradataml/analytics/mle/json/texttagger_mle.json +0 -81
- teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
- teradataml/analytics/mle/json/tf_mle.json +0 -33
- teradataml/analytics/mle/json/tfidf_mle.json +0 -34
- teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
- teradataml/analytics/mle/json/unpack_mle.json +0 -91
- teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
- teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
- teradataml/analytics/mle/json/varmax_mle.json +0 -176
- teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
- teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
- teradataml/analytics/mle/json/xgboost_mle.json +0 -178
- teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
- teradataml/analytics/sqle/Antiselect.py +0 -321
- teradataml/analytics/sqle/Attribution.py +0 -603
- teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
- teradataml/analytics/sqle/GLMPredict.py +0 -430
- teradataml/analytics/sqle/MovingAverage.py +0 -543
- teradataml/analytics/sqle/NGramSplitter.py +0 -548
- teradataml/analytics/sqle/NPath.py +0 -632
- teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
- teradataml/analytics/sqle/Pack.py +0 -388
- teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
- teradataml/analytics/sqle/Sessionize.py +0 -390
- teradataml/analytics/sqle/StringSimilarity.py +0 -400
- teradataml/analytics/sqle/Unpack.py +0 -503
- teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
- teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
- teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
- teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
- teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
- teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
- teradataml/analytics/sqle/json/npath_sqle.json +0 -67
- teradataml/analytics/sqle/json/pack_sqle.json +0 -47
- teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
- teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
- teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
- teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
- teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
- teradataml/catalog/model_cataloging.py +0 -980
- teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
- teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
- teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
- teradataml/table_operators/sandbox_container_util.py +0 -643
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
|
@@ -29,12 +29,12 @@ from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
|
29
29
|
from teradataml.dataframe.sql_functions import case
|
|
30
30
|
from teradataml.series.series import Series
|
|
31
31
|
from teradatasqlalchemy.types import _TDType, BIGINT, INTEGER, PERIOD_TIMESTAMP, SMALLINT, BYTEINT, FLOAT, DECIMAL
|
|
32
|
-
from teradataml.common.utils import UtilFuncs
|
|
32
|
+
from teradataml.common.utils import UtilFuncs
|
|
33
33
|
from teradataml.common.exceptions import TeradataMlException
|
|
34
34
|
from teradataml.common.messages import Messages
|
|
35
35
|
from teradataml.common.messagecodes import MessageCodes
|
|
36
36
|
from teradataml.common.constants import AEDConstants
|
|
37
|
-
from teradataml.common.constants import SourceType, PythonTypes, TeradataConstants
|
|
37
|
+
from teradataml.common.constants import SourceType, PythonTypes, TeradataConstants, \
|
|
38
38
|
TeradataTypes, PTITableConstants, TableOperatorConstants, SQLFunctionConstants
|
|
39
39
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils, DataFrameUtils
|
|
40
40
|
from teradataml.dataframe.indexer import _LocationIndexer
|
|
@@ -52,9 +52,10 @@ from teradatasql import OperationalError
|
|
|
52
52
|
from teradataml.dataframe.window import Window
|
|
53
53
|
from teradataml.dataframe.data_transfer import _DataTransferUtils
|
|
54
54
|
from teradataml.common.bulk_exposed_utils import _validate_unimplemented_function
|
|
55
|
+
from teradatasqlalchemy.telemetry.queryband import collect_queryband
|
|
55
56
|
|
|
56
|
-
#TODO use logger when available on master branch
|
|
57
|
-
#logger = teradatapylog.getLogger()
|
|
57
|
+
# TODO use logger when available on master branch
|
|
58
|
+
# logger = teradatapylog.getLogger()
|
|
58
59
|
in_schema = UtilFuncs._in_schema
|
|
59
60
|
|
|
60
61
|
|
|
@@ -215,7 +216,7 @@ class DataFrame():
|
|
|
215
216
|
raise ValueError(Messages.get_message(
|
|
216
217
|
MessageCodes.FROM_QUERY_SELECT_SUPPORTED).format("Check the syntax."))
|
|
217
218
|
raise ValueError(Messages.get_message(
|
|
218
|
-
|
|
219
|
+
MessageCodes.FROM_QUERY_SELECT_SUPPORTED))
|
|
219
220
|
|
|
220
221
|
self._nodeid = self._aed_utils._aed_query(self._query, temp_table_name)
|
|
221
222
|
else:
|
|
@@ -229,6 +230,8 @@ class DataFrame():
|
|
|
229
230
|
|
|
230
231
|
self._loc = _LocationIndexer(self)
|
|
231
232
|
self._iloc = _LocationIndexer(self, integer_indexing=True)
|
|
233
|
+
self.__data = None
|
|
234
|
+
self.__data_columns = None
|
|
232
235
|
|
|
233
236
|
except TeradataMlException:
|
|
234
237
|
raise
|
|
@@ -239,6 +242,7 @@ class DataFrame():
|
|
|
239
242
|
MessageCodes.TDMLDF_CREATE_FAIL) from err
|
|
240
243
|
|
|
241
244
|
@classmethod
|
|
245
|
+
@collect_queryband(queryband="DF_fromTable")
|
|
242
246
|
def from_table(cls, table_name, index=True, index_label=None):
|
|
243
247
|
"""
|
|
244
248
|
Class method for creating a DataFrame from a table or a view.
|
|
@@ -295,6 +299,7 @@ class DataFrame():
|
|
|
295
299
|
return cls(table_name, index, index_label)
|
|
296
300
|
|
|
297
301
|
@classmethod
|
|
302
|
+
@collect_queryband(queryband="DF_fromQuery")
|
|
298
303
|
def from_query(cls, query, index=True, index_label=None, materialize=False):
|
|
299
304
|
"""
|
|
300
305
|
Class method for creating a DataFrame from a query.
|
|
@@ -407,12 +412,170 @@ class DataFrame():
|
|
|
407
412
|
if undropped_index is not None and all(elem in [col.name for col in metaexpr.c] for elem in undropped_index):
|
|
408
413
|
df._undropped_index = undropped_index
|
|
409
414
|
elif undropped_index is not None and all(UtilFuncs._teradata_quote_arg(elem, "\"", False)
|
|
410
|
-
|
|
411
|
-
|
|
415
|
+
in [col.name for col in metaexpr.c] for elem in undropped_index):
|
|
416
|
+
df._undropped_index = undropped_index
|
|
412
417
|
|
|
413
418
|
return df
|
|
414
419
|
|
|
415
|
-
|
|
420
|
+
@collect_queryband(queryband="DF_fillna")
|
|
421
|
+
def fillna(self, value=None, columns=None, literal_value=False):
|
|
422
|
+
"""
|
|
423
|
+
Method to replace the null values in a column with the value specified.
|
|
424
|
+
|
|
425
|
+
PARAMETERS:
|
|
426
|
+
value:
|
|
427
|
+
Required Argument.
|
|
428
|
+
Specifies the value(s) to replace the null values with. If value is a dict
|
|
429
|
+
then "columns" is ignored.
|
|
430
|
+
Note:
|
|
431
|
+
* To use pre-defined strings to replace the null value set "literal_value" to True.
|
|
432
|
+
Permitted Values:
|
|
433
|
+
* Pre-defined strings:
|
|
434
|
+
* 'MEAN' - Replace null value with the average of the values in the column.
|
|
435
|
+
* 'MODE' - Replace null value with the mode of the values in the column.
|
|
436
|
+
* 'MEDIAN' - Replace null value with the median of the values in the column.
|
|
437
|
+
* 'MIN' - Replace null value with the minimum of the values in the column.
|
|
438
|
+
* 'MAX' - Replace null value with the maximum of the values in the column.
|
|
439
|
+
Types: int, float, str, dict containing column names and value, list
|
|
440
|
+
|
|
441
|
+
columns:
|
|
442
|
+
Optional Argument.
|
|
443
|
+
Specifies the column names to perform the null value replacement. If "columns"
|
|
444
|
+
is None, then all the columns having null value and data type similar to
|
|
445
|
+
the data type of the value specified are considered.
|
|
446
|
+
Default Value: None
|
|
447
|
+
Types: str, tuple or list of str
|
|
448
|
+
|
|
449
|
+
literal_value:
|
|
450
|
+
Optional Argument.
|
|
451
|
+
Specifies whether the pre-defined strings passed to "value" should be treated
|
|
452
|
+
as literal or not.
|
|
453
|
+
Default Value: False
|
|
454
|
+
Types: bool
|
|
455
|
+
|
|
456
|
+
RETURNS:
|
|
457
|
+
teradataml DataFrame
|
|
458
|
+
|
|
459
|
+
RAISES:
|
|
460
|
+
TeradataMlException
|
|
461
|
+
|
|
462
|
+
EXAMPLES:
|
|
463
|
+
>>> load_example_data("dataframe", "sales")
|
|
464
|
+
>>> df = DataFrame("sales")
|
|
465
|
+
>>> df
|
|
466
|
+
Feb Jan Mar Apr datetime
|
|
467
|
+
accounts
|
|
468
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
469
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
470
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
471
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
472
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
473
|
+
Red Inc 200.0 150.0 140.0 NaN 04/01/2017
|
|
474
|
+
|
|
475
|
+
# Example 1: Populate null value in column 'Jan' and 'Mar'
|
|
476
|
+
# with the value specified as dictionary.
|
|
477
|
+
>>> df.fillna({"Jan": 123, "Mar":234})
|
|
478
|
+
accounts Feb Jan Mar Apr datetime
|
|
479
|
+
0 Blue Inc 90.0 50 95 101.0 17/01/04
|
|
480
|
+
1 Alpha Co 210.0 200 215 250.0 17/01/04
|
|
481
|
+
2 Jones LLC 200.0 150 140 180.0 17/01/04
|
|
482
|
+
3 Yellow Inc 90.0 123 234 NaN 17/01/04
|
|
483
|
+
4 Orange Inc 210.0 123 234 250.0 17/01/04
|
|
484
|
+
5 Red Inc 200.0 150 140 NaN 17/01/04
|
|
485
|
+
|
|
486
|
+
# Example 2: Populate the null value in 'Jan' column
|
|
487
|
+
# with minimum value in that column.
|
|
488
|
+
>>> df.fillna("Min", "Jan")
|
|
489
|
+
accounts Feb Jan Mar Apr datetime
|
|
490
|
+
0 Yellow Inc 90.0 50 NaN NaN 17/01/04
|
|
491
|
+
1 Jones LLC 200.0 150 140.0 180.0 17/01/04
|
|
492
|
+
2 Red Inc 200.0 150 140.0 NaN 17/01/04
|
|
493
|
+
3 Blue Inc 90.0 50 95.0 101.0 17/01/04
|
|
494
|
+
4 Alpha Co 210.0 200 215.0 250.0 17/01/04
|
|
495
|
+
5 Orange Inc 210.0 50 NaN 250.0 17/01/04
|
|
496
|
+
"""
|
|
497
|
+
from teradataml import SimpleImputeFit, SimpleImputeTransform
|
|
498
|
+
|
|
499
|
+
arg_info_matrix = []
|
|
500
|
+
arg_info_matrix.append(["value", value, True, (int, float, str, dict, list)])
|
|
501
|
+
arg_info_matrix.append(["columns", columns, True, (list, str, tuple)])
|
|
502
|
+
arg_info_matrix.append(["literal_value", literal_value, True, (bool)])
|
|
503
|
+
|
|
504
|
+
# Validate argument types
|
|
505
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
506
|
+
|
|
507
|
+
if isinstance(columns, tuple):
|
|
508
|
+
columns = list(columns)
|
|
509
|
+
|
|
510
|
+
# If dict is passed separate the values of 'columns' and 'value'
|
|
511
|
+
if isinstance(value, dict):
|
|
512
|
+
columns, value = zip(*value.items())
|
|
513
|
+
columns = [str(col) for col in columns]
|
|
514
|
+
value = [str(val) for val in value]
|
|
515
|
+
|
|
516
|
+
is_stats = False
|
|
517
|
+
|
|
518
|
+
for val in UtilFuncs._as_list(value):
|
|
519
|
+
if isinstance(val, str) and val.lower() in ["mean", "median", "mode", "min", "max"]:
|
|
520
|
+
is_stats = True
|
|
521
|
+
break
|
|
522
|
+
|
|
523
|
+
# If "literal_value" is set to False
|
|
524
|
+
if not literal_value and is_stats:
|
|
525
|
+
stats = []
|
|
526
|
+
stats_columns = []
|
|
527
|
+
literals = []
|
|
528
|
+
literals_columns = []
|
|
529
|
+
# If value is a list, extract columns and values, if values match to any
|
|
530
|
+
# predefined string then assign it to stats and column name to stats_column
|
|
531
|
+
# else treat it as a literal value and literal column.
|
|
532
|
+
if isinstance(value, list):
|
|
533
|
+
for val, col in zip(value, columns):
|
|
534
|
+
if isinstance(val, str) and val.lower() in ["mean", "median", "mode", "min", "max"]:
|
|
535
|
+
stats.append(val)
|
|
536
|
+
stats_columns.append(col)
|
|
537
|
+
else:
|
|
538
|
+
literals.append(str(val))
|
|
539
|
+
literals_columns.append(col)
|
|
540
|
+
else:
|
|
541
|
+
# In case it is not a list then simply assign it to 'stats' and 'stats_columns'
|
|
542
|
+
stats = value
|
|
543
|
+
stats_columns = columns
|
|
544
|
+
|
|
545
|
+
# In case no literal value found in the list and literal list is empty assign it as 'None'
|
|
546
|
+
# instead of empty list.
|
|
547
|
+
literals = None if not literals else literals
|
|
548
|
+
literals_columns = None if not literals_columns else literals_columns
|
|
549
|
+
|
|
550
|
+
else:
|
|
551
|
+
# If it is a literal value then 'stats' and 'stats_column' is not required
|
|
552
|
+
stats = None
|
|
553
|
+
stats_columns = None
|
|
554
|
+
|
|
555
|
+
# In case column is not specified by the user, then all the columns in that dataframe
|
|
556
|
+
# should be considered else the specified columns should be considered for 'literal_columns'
|
|
557
|
+
literals_columns = self.columns if (columns is None and value is not None) else columns
|
|
558
|
+
literals_columns = UtilFuncs._as_list(literals_columns)
|
|
559
|
+
# In case value is a list of single element, then multiply it as many times as
|
|
560
|
+
# number of columns ['12'] -> ['12','12', upto number of columns]
|
|
561
|
+
# else convert it to str and append it
|
|
562
|
+
if isinstance(value, list):
|
|
563
|
+
literals = []
|
|
564
|
+
for val in value:
|
|
565
|
+
literals.append(str(val))
|
|
566
|
+
else:
|
|
567
|
+
literals = UtilFuncs._as_list(str(value))
|
|
568
|
+
literals = literals * len(literals_columns) if len(literals) != len(literals_columns) else literals
|
|
569
|
+
|
|
570
|
+
fit_obj = SimpleImputeFit(data=self,
|
|
571
|
+
literals=literals,
|
|
572
|
+
literals_columns=literals_columns,
|
|
573
|
+
stats=stats,
|
|
574
|
+
stats_columns=stats_columns)
|
|
575
|
+
|
|
576
|
+
return fit_obj.transform(data=self).result
|
|
577
|
+
|
|
578
|
+
def __execute_node_and_set_table_name(self, nodeid, metaexpr=None):
|
|
416
579
|
"""
|
|
417
580
|
Private method for executing node and setting _table_name,
|
|
418
581
|
if not set already.
|
|
@@ -632,14 +795,14 @@ class DataFrame():
|
|
|
632
795
|
# get the ColumnExpression from the _MetaExpression
|
|
633
796
|
if isinstance(key, str):
|
|
634
797
|
return self.__getattr__(key)
|
|
635
|
-
|
|
798
|
+
|
|
636
799
|
if isinstance(key, list):
|
|
637
800
|
return self.select(key)
|
|
638
801
|
|
|
639
802
|
if isinstance(key, ClauseElement):
|
|
640
803
|
from teradataml.dataframe.sql import _SQLColumnExpression
|
|
641
804
|
key = _SQLColumnExpression(key)
|
|
642
|
-
|
|
805
|
+
|
|
643
806
|
# apply the filter expression
|
|
644
807
|
if isinstance(key, ColumnExpression):
|
|
645
808
|
|
|
@@ -662,7 +825,7 @@ class DataFrame():
|
|
|
662
825
|
raise
|
|
663
826
|
|
|
664
827
|
except ValueError:
|
|
665
|
-
|
|
828
|
+
raise
|
|
666
829
|
|
|
667
830
|
except Exception as err:
|
|
668
831
|
errcode = MessageCodes.TDMLDF_INFO_ERROR
|
|
@@ -1073,6 +1236,7 @@ class DataFrame():
|
|
|
1073
1236
|
td_metadata = [(column.name, repr(column.type)) for column in self._metaexpr.c]
|
|
1074
1237
|
return MetaData(td_metadata)
|
|
1075
1238
|
|
|
1239
|
+
@collect_queryband(queryband="DF_info")
|
|
1076
1240
|
def info(self, verbose=True, buf=None, max_cols=None, null_counts=False):
|
|
1077
1241
|
"""
|
|
1078
1242
|
DESCRIPTION:
|
|
@@ -1193,8 +1357,10 @@ class DataFrame():
|
|
|
1193
1357
|
except TeradataMlException:
|
|
1194
1358
|
raise
|
|
1195
1359
|
except Exception as err:
|
|
1196
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1360
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1361
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1197
1362
|
|
|
1363
|
+
@collect_queryband(queryband="DF_head")
|
|
1198
1364
|
def head(self, n=display.max_rows):
|
|
1199
1365
|
"""
|
|
1200
1366
|
DESCRIPTION:
|
|
@@ -1291,8 +1457,10 @@ class DataFrame():
|
|
|
1291
1457
|
except TeradataMlException:
|
|
1292
1458
|
raise
|
|
1293
1459
|
except Exception as err:
|
|
1294
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1460
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1461
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1295
1462
|
|
|
1463
|
+
@collect_queryband(queryband="DF_tail")
|
|
1296
1464
|
def tail(self, n=display.max_rows):
|
|
1297
1465
|
"""
|
|
1298
1466
|
DESCRIPTION:
|
|
@@ -1379,14 +1547,16 @@ class DataFrame():
|
|
|
1379
1547
|
|
|
1380
1548
|
try:
|
|
1381
1549
|
if self._metaexpr is None:
|
|
1382
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1550
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1551
|
+
MessageCodes.TDMLDF_INFO_ERROR)
|
|
1383
1552
|
|
|
1384
1553
|
sort_col = self._get_sort_col()
|
|
1385
1554
|
return df_utils._get_sorted_nrow(self, n, sort_col[0], asc=False)
|
|
1386
1555
|
except TeradataMlException:
|
|
1387
1556
|
raise
|
|
1388
1557
|
except Exception as err:
|
|
1389
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1558
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1559
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1390
1560
|
|
|
1391
1561
|
def _get_axis(self, axis):
|
|
1392
1562
|
"""
|
|
@@ -1414,14 +1584,17 @@ class DataFrame():
|
|
|
1414
1584
|
elif axis == "columns":
|
|
1415
1585
|
return 1
|
|
1416
1586
|
else:
|
|
1417
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
|
|
1587
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
|
|
1588
|
+
MessageCodes.TDMLDF_INVALID_DROP_AXIS)
|
|
1418
1589
|
elif isinstance(axis, numbers.Integral):
|
|
1419
1590
|
if axis in [0, 1]:
|
|
1420
1591
|
return axis
|
|
1421
1592
|
else:
|
|
1422
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
|
|
1593
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
|
|
1594
|
+
MessageCodes.TDMLDF_INVALID_DROP_AXIS)
|
|
1423
1595
|
else:
|
|
1424
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
|
|
1596
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_DROP_AXIS),
|
|
1597
|
+
MessageCodes.TDMLDF_INVALID_DROP_AXIS)
|
|
1425
1598
|
|
|
1426
1599
|
def _get_sort_col(self):
|
|
1427
1600
|
"""
|
|
@@ -1446,7 +1619,7 @@ class DataFrame():
|
|
|
1446
1619
|
col_name = self._index_label[0]
|
|
1447
1620
|
else:
|
|
1448
1621
|
col_name = self._index_label
|
|
1449
|
-
else:
|
|
1622
|
+
else: # Use the first column from metadata
|
|
1450
1623
|
col_name = self.columns[0]
|
|
1451
1624
|
|
|
1452
1625
|
col_type = PythonTypes.PY_NULL_TYPE.value
|
|
@@ -1455,16 +1628,20 @@ class DataFrame():
|
|
|
1455
1628
|
col_type = py_type
|
|
1456
1629
|
|
|
1457
1630
|
if col_type == PythonTypes.PY_NULL_TYPE.value:
|
|
1458
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1631
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1632
|
+
MessageCodes.TDMLDF_INFO_ERROR)
|
|
1459
1633
|
|
|
1460
1634
|
sort_col_sqlalchemy_type = (self._metaexpr.t.c[col_name].type)
|
|
1461
1635
|
# convert types to string from sqlalchemy type for the columns entered for sort
|
|
1462
1636
|
sort_col_type = repr(sort_col_sqlalchemy_type).split("(")[0]
|
|
1463
1637
|
if sort_col_type in unsupported_types:
|
|
1464
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, sort_col_type,
|
|
1638
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, sort_col_type,
|
|
1639
|
+
"ANY, except following {}".format(unsupported_types)),
|
|
1640
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
1465
1641
|
|
|
1466
1642
|
return (col_name, col_type)
|
|
1467
1643
|
|
|
1644
|
+
@collect_queryband(queryband="DF_drop")
|
|
1468
1645
|
def drop(self, labels=None, axis=0, columns=None):
|
|
1469
1646
|
"""
|
|
1470
1647
|
DESCRIPTION:
|
|
@@ -1585,7 +1762,7 @@ class DataFrame():
|
|
|
1585
1762
|
index_labels = labels
|
|
1586
1763
|
else:
|
|
1587
1764
|
column_labels = labels
|
|
1588
|
-
else:
|
|
1765
|
+
else: # Columns is not None
|
|
1589
1766
|
column_labels = columns
|
|
1590
1767
|
|
|
1591
1768
|
if index_labels is not None:
|
|
@@ -1594,7 +1771,8 @@ class DataFrame():
|
|
|
1594
1771
|
|
|
1595
1772
|
if isinstance(index_labels, list):
|
|
1596
1773
|
if len(index_labels) == 0:
|
|
1597
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
1774
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
1775
|
+
MessageCodes.TDMLDF_DROP_ARGS)
|
|
1598
1776
|
|
|
1599
1777
|
if sort_col[1] == PythonTypes.PY_STRING_TYPE.value:
|
|
1600
1778
|
index_labels = ["'{}'".format(x) for x in index_labels]
|
|
@@ -1606,25 +1784,29 @@ class DataFrame():
|
|
|
1606
1784
|
index_expr = index_labels
|
|
1607
1785
|
|
|
1608
1786
|
filter_expr = "{0} not in ({1})".format(sort_col[0], index_expr)
|
|
1609
|
-
new_nodeid= self._aed_utils._aed_filter(self._nodeid, filter_expr)
|
|
1787
|
+
new_nodeid = self._aed_utils._aed_filter(self._nodeid, filter_expr)
|
|
1610
1788
|
# Get the updated metaexpr
|
|
1611
1789
|
new_metaexpr = UtilFuncs._get_metaexpr_using_parent_metaexpr(new_nodeid, self._metaexpr)
|
|
1612
1790
|
return self._create_dataframe_from_node(new_nodeid, new_metaexpr, self._index_label)
|
|
1613
|
-
else:
|
|
1791
|
+
else: # Column labels
|
|
1614
1792
|
select_cols = []
|
|
1615
1793
|
cols = [x.name for x in self._metaexpr.columns]
|
|
1616
1794
|
if isinstance(column_labels, list):
|
|
1617
1795
|
if len(column_labels) == 0:
|
|
1618
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
1796
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
1797
|
+
MessageCodes.TDMLDF_DROP_ARGS)
|
|
1619
1798
|
|
|
1620
1799
|
if not all(isinstance(n, str) for n in column_labels):
|
|
1621
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES),
|
|
1800
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES),
|
|
1801
|
+
MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES)
|
|
1622
1802
|
drop_cols = [x for x in column_labels]
|
|
1623
1803
|
elif isinstance(column_labels, (tuple, dict)):
|
|
1624
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
1804
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ARGS),
|
|
1805
|
+
MessageCodes.TDMLDF_DROP_ARGS)
|
|
1625
1806
|
else:
|
|
1626
1807
|
if not isinstance(column_labels, str):
|
|
1627
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES),
|
|
1808
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES),
|
|
1809
|
+
MessageCodes.TDMLDF_DROP_INVALID_COL_NAMES)
|
|
1628
1810
|
drop_cols = [column_labels]
|
|
1629
1811
|
|
|
1630
1812
|
for drop_name in drop_cols:
|
|
@@ -1637,14 +1819,17 @@ class DataFrame():
|
|
|
1637
1819
|
select_cols.append(colname)
|
|
1638
1820
|
if len(select_cols) > 0:
|
|
1639
1821
|
return self.select(select_cols)
|
|
1640
|
-
else:
|
|
1641
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ALL_COLS),
|
|
1822
|
+
else: # no columns selected
|
|
1823
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_DROP_ALL_COLS),
|
|
1824
|
+
MessageCodes.TDMLDF_DROP_ALL_COLS)
|
|
1642
1825
|
|
|
1643
1826
|
except TeradataMlException:
|
|
1644
1827
|
raise
|
|
1645
1828
|
except Exception as err:
|
|
1646
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1829
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1830
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1647
1831
|
|
|
1832
|
+
@collect_queryband(queryband="DF_dropna")
|
|
1648
1833
|
def dropna(self, how='any', thresh=None, subset=None):
|
|
1649
1834
|
"""
|
|
1650
1835
|
DESCRIPTION:
|
|
@@ -1755,10 +1940,10 @@ class DataFrame():
|
|
|
1755
1940
|
filter_expr = "{0} >= {1}".format(fmt_filter, thresh)
|
|
1756
1941
|
elif how == 'any':
|
|
1757
1942
|
filter_expr = "{0} = {1}".format(fmt_filter, len(col_filters))
|
|
1758
|
-
else:
|
|
1943
|
+
else: # how == 'all'
|
|
1759
1944
|
filter_expr = "{0} > 0".format(fmt_filter)
|
|
1760
1945
|
|
|
1761
|
-
new_nodeid= self._aed_utils._aed_filter(self._nodeid, filter_expr)
|
|
1946
|
+
new_nodeid = self._aed_utils._aed_filter(self._nodeid, filter_expr)
|
|
1762
1947
|
|
|
1763
1948
|
# Get the updated metaexpr
|
|
1764
1949
|
new_metaexpr = UtilFuncs._get_metaexpr_using_parent_metaexpr(new_nodeid, self._metaexpr)
|
|
@@ -1766,8 +1951,10 @@ class DataFrame():
|
|
|
1766
1951
|
except TeradataMlException:
|
|
1767
1952
|
raise
|
|
1768
1953
|
except Exception as err:
|
|
1769
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1954
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
1955
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
1770
1956
|
|
|
1957
|
+
@collect_queryband(queryband="DF_sort")
|
|
1771
1958
|
def sort(self, columns, ascending=True):
|
|
1772
1959
|
"""
|
|
1773
1960
|
DESCRIPTION:
|
|
@@ -1903,16 +2090,16 @@ class DataFrame():
|
|
|
1903
2090
|
_Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
|
|
1904
2091
|
|
|
1905
2092
|
try:
|
|
1906
|
-
orderexpr=""
|
|
1907
|
-
type_expr=[]
|
|
2093
|
+
orderexpr = ""
|
|
2094
|
+
type_expr = []
|
|
1908
2095
|
invalid_types = []
|
|
1909
2096
|
invalid_columns = []
|
|
1910
2097
|
unsupported_types = ['BLOB', 'CLOB', 'ARRAY', 'VARRAY']
|
|
1911
2098
|
|
|
1912
2099
|
if (isinstance(columns, str)):
|
|
1913
|
-
columns=[columns]
|
|
2100
|
+
columns = [columns]
|
|
1914
2101
|
if isinstance(ascending, bool):
|
|
1915
|
-
ascending=[ascending] * len(columns)
|
|
2102
|
+
ascending = [ascending] * len(columns)
|
|
1916
2103
|
|
|
1917
2104
|
# Validating lengths of passed arguments which are passed i.e. length of columns
|
|
1918
2105
|
# must be same as ascending
|
|
@@ -1935,9 +2122,10 @@ class DataFrame():
|
|
|
1935
2122
|
invalid_columns.append(column_name)
|
|
1936
2123
|
invalid_types.append(col_type)
|
|
1937
2124
|
if len(invalid_types) > 0:
|
|
1938
|
-
raise TeradataMlException(
|
|
1939
|
-
|
|
1940
|
-
|
|
2125
|
+
raise TeradataMlException(
|
|
2126
|
+
Messages.get_message(MessageCodes.INVALID_COLUMN_DATATYPE, ", ".join(invalid_columns), 'columns',
|
|
2127
|
+
"Invalid", ", ".join(unsupported_types)),
|
|
2128
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
1941
2129
|
|
|
1942
2130
|
orderexpr = []
|
|
1943
2131
|
get_column_order = lambda asc: 'ASC' if asc else 'DESC'
|
|
@@ -1974,7 +2162,8 @@ class DataFrame():
|
|
|
1974
2162
|
except TeradataMlException:
|
|
1975
2163
|
raise
|
|
1976
2164
|
|
|
1977
|
-
|
|
2165
|
+
@collect_queryband(queryband="DF_filter")
|
|
2166
|
+
def filter(self, items=None, like=None, regex=None, axis=1, **kw):
|
|
1978
2167
|
"""
|
|
1979
2168
|
DESCRIPTION:
|
|
1980
2169
|
Filter rows or columns of dataframe according to labels in the specified index.
|
|
@@ -2239,12 +2428,12 @@ class DataFrame():
|
|
|
2239
2428
|
op += 'regex'
|
|
2240
2429
|
valid_value = type(regex) is str
|
|
2241
2430
|
|
|
2242
|
-
if op not in('items', 'like', 'regex'):
|
|
2431
|
+
if op not in ('items', 'like', 'regex'):
|
|
2243
2432
|
raise ValueError('Must use exactly one of the parameters items, like, and regex.')
|
|
2244
2433
|
|
|
2245
2434
|
if not valid_value:
|
|
2246
|
-
msg = 'The "items" parameter must be list of strings or tuples of column labels/index values. '
|
|
2247
|
-
|
|
2435
|
+
msg = 'The "items" parameter must be list of strings or tuples of column labels/index values. ' + \
|
|
2436
|
+
'The "regex" parameter and "like" parameter must be strings.'
|
|
2248
2437
|
raise TeradataMlException(msg, errcode)
|
|
2249
2438
|
|
|
2250
2439
|
# validate multi index labels for items
|
|
@@ -2283,7 +2472,9 @@ class DataFrame():
|
|
|
2283
2472
|
msg = Messages.get_message(errcode)
|
|
2284
2473
|
raise TeradataMlException(msg, errcode)
|
|
2285
2474
|
|
|
2286
|
-
|
|
2475
|
+
@collect_queryband(queryband="DF_describe")
|
|
2476
|
+
def describe(self, percentiles=[.25, .5, .75], include=None, verbose=False, distinct=False, statistics=None,
|
|
2477
|
+
columns=None):
|
|
2287
2478
|
"""
|
|
2288
2479
|
DESCRIPTION:
|
|
2289
2480
|
Generates statistics for numeric columns. This function can be used in two modes:
|
|
@@ -2355,6 +2546,13 @@ class DataFrame():
|
|
|
2355
2546
|
Default Values: None
|
|
2356
2547
|
Types: str or List of str
|
|
2357
2548
|
|
|
2549
|
+
columns:
|
|
2550
|
+
Optional Argument.
|
|
2551
|
+
Specifies the name(s) of the columns we are collecting statistics for.
|
|
2552
|
+
Default Values: None
|
|
2553
|
+
Types: str or List of str
|
|
2554
|
+
|
|
2555
|
+
|
|
2358
2556
|
RETURNS:
|
|
2359
2557
|
teradataml DataFrame
|
|
2360
2558
|
|
|
@@ -2629,19 +2827,27 @@ class DataFrame():
|
|
|
2629
2827
|
|
|
2630
2828
|
# Argument validations
|
|
2631
2829
|
awu_matrix = []
|
|
2830
|
+
awu_matrix.append(["columns", columns, True, (str, list), True])
|
|
2632
2831
|
awu_matrix.append(["percentiles", percentiles, True, (float, list)])
|
|
2633
2832
|
awu_matrix.append(["include", include, True, (str), True, [None, "all"]])
|
|
2634
2833
|
awu_matrix.append(["verbose", verbose, True, (bool)])
|
|
2635
2834
|
awu_matrix.append(["distinct", distinct, True, (bool)])
|
|
2636
|
-
awu_matrix.append(["statistics", statistics, True, (str, list), True,
|
|
2835
|
+
awu_matrix.append(["statistics", statistics, True, (str, list), True,
|
|
2836
|
+
["count", "mean", "min", "max", "unique", "std", "describe", "percentile"]])
|
|
2637
2837
|
|
|
2638
2838
|
# Validate argument types
|
|
2639
2839
|
_Validators._validate_function_arguments(awu_matrix)
|
|
2640
2840
|
|
|
2841
|
+
# Checking each element in passed columns to be valid column in dataframe
|
|
2842
|
+
_Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
|
|
2843
|
+
|
|
2641
2844
|
# Validate argument values.
|
|
2642
2845
|
if isinstance(percentiles, float):
|
|
2643
2846
|
percentiles = [percentiles]
|
|
2644
2847
|
|
|
2848
|
+
if columns:
|
|
2849
|
+
columns = UtilFuncs._as_list(columns)
|
|
2850
|
+
|
|
2645
2851
|
# Converting the statistics list to lower case
|
|
2646
2852
|
if statistics:
|
|
2647
2853
|
statistics = [stats.lower() for stats in UtilFuncs._as_list(statistics)]
|
|
@@ -2655,7 +2861,7 @@ class DataFrame():
|
|
|
2655
2861
|
# Percentiles must be a list of values between 0 and 1.
|
|
2656
2862
|
if not isinstance(percentiles, list) or not all(p > 0 and p < 1 for p in percentiles):
|
|
2657
2863
|
raise ValueError(Messages.get_message(MessageCodes.INVALID_ARG_VALUE, percentiles, "percentiles",
|
|
2658
|
-
|
|
2864
|
+
"percentiles must be a list of values between 0 and 1"))
|
|
2659
2865
|
|
|
2660
2866
|
# Argument 'include' with value 'all' is not allowed for DataFrameGroupByTime
|
|
2661
2867
|
if include is not None and include.lower() == "all" and isinstance(self, DataFrameGroupByTime):
|
|
@@ -2679,11 +2885,16 @@ class DataFrame():
|
|
|
2679
2885
|
groupby_column_list = None
|
|
2680
2886
|
if isinstance(self, DataFrameGroupBy):
|
|
2681
2887
|
groupby_column_list = self.groupby_column_list
|
|
2888
|
+
df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
|
|
2889
|
+
groupby_column_list=groupby_column_list)
|
|
2682
2890
|
|
|
2683
2891
|
if isinstance(self, DataFrameGroupByTime):
|
|
2684
2892
|
groupby_column_list = self.groupby_column_list
|
|
2893
|
+
df_utils._invalid_describe_column(df=self, columns=columns, metaexpr=self._metaexpr,
|
|
2894
|
+
groupby_column_list=groupby_column_list)
|
|
2895
|
+
|
|
2685
2896
|
# Construct the aggregate query.
|
|
2686
|
-
agg_query = df_utils._construct_describe_query(df=self, metaexpr=self._metaexpr,
|
|
2897
|
+
agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
|
|
2687
2898
|
percentiles=percentiles, function_label=function_label,
|
|
2688
2899
|
groupby_column_list=groupby_column_list, include=include,
|
|
2689
2900
|
is_time_series_aggregate=True, verbose=verbose,
|
|
@@ -2695,7 +2906,7 @@ class DataFrame():
|
|
|
2695
2906
|
fill=self._fill)
|
|
2696
2907
|
else:
|
|
2697
2908
|
# Construct the aggregate query.
|
|
2698
|
-
agg_query = df_utils._construct_describe_query(df=self, metaexpr=self._metaexpr,
|
|
2909
|
+
agg_query = df_utils._construct_describe_query(df=self, columns=columns, metaexpr=self._metaexpr,
|
|
2699
2910
|
percentiles=percentiles, function_label=function_label,
|
|
2700
2911
|
groupby_column_list=groupby_column_list, include=include,
|
|
2701
2912
|
is_time_series_aggregate=False, verbose=verbose,
|
|
@@ -2710,7 +2921,7 @@ class DataFrame():
|
|
|
2710
2921
|
describe_df = df2
|
|
2711
2922
|
else:
|
|
2712
2923
|
describe_df = DataFrame.from_query(agg_query, index_label=function_label)
|
|
2713
|
-
|
|
2924
|
+
|
|
2714
2925
|
# Check if numeric overflow can occur for result DataFrame.
|
|
2715
2926
|
if self._check_numeric_overflow(describe_df):
|
|
2716
2927
|
result_df = self._promote_dataframe_types()
|
|
@@ -2719,8 +2930,10 @@ class DataFrame():
|
|
|
2719
2930
|
except TeradataMlException:
|
|
2720
2931
|
raise
|
|
2721
2932
|
except Exception as err:
|
|
2722
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
2933
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
2934
|
+
MessageCodes.TDMLDF_INFO_ERROR) from err
|
|
2723
2935
|
|
|
2936
|
+
@collect_queryband(queryband="DF_kurtosis")
|
|
2724
2937
|
def kurtosis(self, distinct=False):
|
|
2725
2938
|
"""
|
|
2726
2939
|
DESCRIPTION:
|
|
@@ -2863,6 +3076,7 @@ class DataFrame():
|
|
|
2863
3076
|
|
|
2864
3077
|
return self._get_dataframe_aggregate(operation='kurtosis', distinct=distinct)
|
|
2865
3078
|
|
|
3079
|
+
@collect_queryband(queryband="DF_min")
|
|
2866
3080
|
def min(self, distinct=False):
|
|
2867
3081
|
"""
|
|
2868
3082
|
DESCRIPTION:
|
|
@@ -2993,6 +3207,7 @@ class DataFrame():
|
|
|
2993
3207
|
|
|
2994
3208
|
return self._get_dataframe_aggregate(operation='min', distinct=distinct)
|
|
2995
3209
|
|
|
3210
|
+
@collect_queryband(queryband="DF_max")
|
|
2996
3211
|
def max(self, distinct=False):
|
|
2997
3212
|
"""
|
|
2998
3213
|
DESCRIPTION:
|
|
@@ -3124,6 +3339,7 @@ class DataFrame():
|
|
|
3124
3339
|
|
|
3125
3340
|
return self._get_dataframe_aggregate(operation='max', distinct=distinct)
|
|
3126
3341
|
|
|
3342
|
+
@collect_queryband(queryband="DF_mean")
|
|
3127
3343
|
def mean(self, distinct=False):
|
|
3128
3344
|
"""
|
|
3129
3345
|
DESCRIPTION:
|
|
@@ -3247,8 +3463,9 @@ class DataFrame():
|
|
|
3247
3463
|
# Validate argument types
|
|
3248
3464
|
_Validators._validate_function_arguments(awu_matrix)
|
|
3249
3465
|
|
|
3250
|
-
return self._get_dataframe_aggregate(operation='mean', distinct
|
|
3466
|
+
return self._get_dataframe_aggregate(operation='mean', distinct=distinct)
|
|
3251
3467
|
|
|
3468
|
+
@collect_queryband(queryband="DF_skew")
|
|
3252
3469
|
def skew(self, distinct=False):
|
|
3253
3470
|
"""
|
|
3254
3471
|
DESCRIPTION:
|
|
@@ -3388,6 +3605,7 @@ class DataFrame():
|
|
|
3388
3605
|
|
|
3389
3606
|
return self._get_dataframe_aggregate(operation='skew', distinct=distinct)
|
|
3390
3607
|
|
|
3608
|
+
@collect_queryband(queryband="DF_sum")
|
|
3391
3609
|
def sum(self, distinct=False):
|
|
3392
3610
|
"""
|
|
3393
3611
|
DESCRIPTION:
|
|
@@ -3513,6 +3731,7 @@ class DataFrame():
|
|
|
3513
3731
|
|
|
3514
3732
|
return self._get_dataframe_aggregate(operation='sum', distinct=distinct)
|
|
3515
3733
|
|
|
3734
|
+
@collect_queryband(queryband="DF_count")
|
|
3516
3735
|
def count(self, distinct=False):
|
|
3517
3736
|
"""
|
|
3518
3737
|
DESCRIPTION:
|
|
@@ -3635,6 +3854,7 @@ class DataFrame():
|
|
|
3635
3854
|
_Validators._validate_function_arguments(awu_matrix)
|
|
3636
3855
|
return self._get_dataframe_aggregate(operation='count', distinct=distinct)
|
|
3637
3856
|
|
|
3857
|
+
@collect_queryband(queryband="DF_csum")
|
|
3638
3858
|
def csum(self, sort_columns, drop_columns=False):
|
|
3639
3859
|
"""
|
|
3640
3860
|
DESCRIPTION:
|
|
@@ -3738,6 +3958,7 @@ class DataFrame():
|
|
|
3738
3958
|
self._validate_window_aggregates_arguments(sort_columns)
|
|
3739
3959
|
return self._get_dataframe_aggregate(operation='csum', sort_columns=sort_columns, drop_columns=drop_columns)
|
|
3740
3960
|
|
|
3961
|
+
@collect_queryband(queryband="DF_msum")
|
|
3741
3962
|
def msum(self, width, sort_columns, drop_columns=False):
|
|
3742
3963
|
"""
|
|
3743
3964
|
DESCRIPTION:
|
|
@@ -3850,6 +4071,7 @@ class DataFrame():
|
|
|
3850
4071
|
return self._get_dataframe_aggregate(
|
|
3851
4072
|
operation='msum', width=width, sort_columns=sort_columns, drop_columns=drop_columns)
|
|
3852
4073
|
|
|
4074
|
+
@collect_queryband(queryband="DF_mavg")
|
|
3853
4075
|
def mavg(self, width, sort_columns, drop_columns=False):
|
|
3854
4076
|
"""
|
|
3855
4077
|
DESCRIPTION:
|
|
@@ -3963,6 +4185,7 @@ class DataFrame():
|
|
|
3963
4185
|
return self._get_dataframe_aggregate(
|
|
3964
4186
|
operation='mavg', width=width, sort_columns=sort_columns, drop_columns=drop_columns)
|
|
3965
4187
|
|
|
4188
|
+
@collect_queryband(queryband="DF_mdiff")
|
|
3966
4189
|
def mdiff(self, width, sort_columns, drop_columns=False):
|
|
3967
4190
|
"""
|
|
3968
4191
|
DESCRIPTION:
|
|
@@ -4076,6 +4299,7 @@ class DataFrame():
|
|
|
4076
4299
|
return self._get_dataframe_aggregate(
|
|
4077
4300
|
operation='mdiff', width=width, sort_columns=sort_columns, drop_columns=drop_columns)
|
|
4078
4301
|
|
|
4302
|
+
@collect_queryband(queryband="DF_mlinreg")
|
|
4079
4303
|
def mlinreg(self, width, sort_column, drop_columns=False):
|
|
4080
4304
|
"""
|
|
4081
4305
|
DESCRIPTION:
|
|
@@ -4266,6 +4490,7 @@ class DataFrame():
|
|
|
4266
4490
|
_Validators._validate_unexpected_column_type(
|
|
4267
4491
|
self, sort_columns, sort_columns_arg_name, _Dtypes._get_sort_unsupported_data_types())
|
|
4268
4492
|
|
|
4493
|
+
@collect_queryband(queryband="DF_std")
|
|
4269
4494
|
def std(self, distinct=False, population=False):
|
|
4270
4495
|
"""
|
|
4271
4496
|
DESCRIPTION:
|
|
@@ -4441,6 +4666,7 @@ class DataFrame():
|
|
|
4441
4666
|
_Validators._validate_function_arguments(awu_matrix)
|
|
4442
4667
|
return self._get_dataframe_aggregate(operation='std', distinct=distinct, population=population)
|
|
4443
4668
|
|
|
4669
|
+
@collect_queryband(queryband="DF_median")
|
|
4444
4670
|
def median(self, distinct=False):
|
|
4445
4671
|
"""
|
|
4446
4672
|
DESCRIPTION:
|
|
@@ -4566,12 +4792,14 @@ class DataFrame():
|
|
|
4566
4792
|
_Validators._validate_function_arguments(awu_matrix)
|
|
4567
4793
|
|
|
4568
4794
|
if distinct and not isinstance(self, DataFrameGroupByTime):
|
|
4569
|
-
raise ValueError(
|
|
4570
|
-
|
|
4571
|
-
|
|
4795
|
+
raise ValueError(
|
|
4796
|
+
Messages.get_message(MessageCodes.ARG_VALUE_CLASS_DEPENDENCY).format('distinct', 'Aggregation',
|
|
4797
|
+
'True', 'median()',
|
|
4798
|
+
'DataFrameGroupByTime'))
|
|
4572
4799
|
|
|
4573
|
-
return self._get_dataframe_aggregate(operation
|
|
4800
|
+
return self._get_dataframe_aggregate(operation='median', distinct=distinct)
|
|
4574
4801
|
|
|
4802
|
+
@collect_queryband(queryband="DF_var")
|
|
4575
4803
|
def var(self, distinct=False, population=False):
|
|
4576
4804
|
"""
|
|
4577
4805
|
DESCRIPTION:
|
|
@@ -4773,7 +5001,8 @@ class DataFrame():
|
|
|
4773
5001
|
|
|
4774
5002
|
return self._get_dataframe_aggregate(operation='var', distinct=distinct, population=population)
|
|
4775
5003
|
|
|
4776
|
-
|
|
5004
|
+
@collect_queryband(queryband="DF_agg")
|
|
5005
|
+
def agg(self, func=None):
|
|
4777
5006
|
"""
|
|
4778
5007
|
DESCRIPTION:
|
|
4779
5008
|
Perform aggregates using one or more operations.
|
|
@@ -4918,10 +5147,12 @@ class DataFrame():
|
|
|
4918
5147
|
|
|
4919
5148
|
if not isinstance(func, str) and not isinstance(func, list) and not isinstance(func, dict):
|
|
4920
5149
|
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE,
|
|
4921
|
-
|
|
5150
|
+
'func', ['str', 'list', 'dict']),
|
|
5151
|
+
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
4922
5152
|
|
|
4923
5153
|
return self._get_dataframe_aggregate(func)
|
|
4924
5154
|
|
|
5155
|
+
@collect_queryband(arg_name="operation", prefix="DF")
|
|
4925
5156
|
def _get_dataframe_aggregate(self, operation, **kwargs):
|
|
4926
5157
|
"""
|
|
4927
5158
|
Returns the DataFrame given the aggregate operation or list of
|
|
@@ -5048,15 +5279,17 @@ class DataFrame():
|
|
|
5048
5279
|
# Return Empty DataFrame if all the columns are selected in groupby as parent has
|
|
5049
5280
|
if len(col_names) == 0:
|
|
5050
5281
|
aggregate_expression, new_column_names, new_column_types = \
|
|
5051
|
-
|
|
5052
|
-
|
|
5053
|
-
|
|
5282
|
+
df_utils._construct_sql_expression_for_aggregations(self,
|
|
5283
|
+
groupby_col_names, groupby_col_types, operation,
|
|
5284
|
+
as_time_series_aggregate=is_time_series_aggregate,
|
|
5285
|
+
**kwargs)
|
|
5054
5286
|
self._index_label = new_column_names
|
|
5055
5287
|
else:
|
|
5056
5288
|
aggregate_expression, new_column_names, new_column_types = \
|
|
5057
|
-
|
|
5058
|
-
|
|
5059
|
-
|
|
5289
|
+
df_utils._construct_sql_expression_for_aggregations(self,
|
|
5290
|
+
col_names, col_types, operation,
|
|
5291
|
+
as_time_series_aggregate=is_time_series_aggregate,
|
|
5292
|
+
**kwargs)
|
|
5060
5293
|
new_column_names = pti_default_cols_proj + groupby_col_names + new_column_names
|
|
5061
5294
|
new_column_types = pti_default_cols_types + groupby_col_types + new_column_types
|
|
5062
5295
|
|
|
@@ -5069,7 +5302,7 @@ class DataFrame():
|
|
|
5069
5302
|
new_metaexpr = UtilFuncs._get_metaexpr_using_columns(aggregate_node_id,
|
|
5070
5303
|
zip(new_column_names,
|
|
5071
5304
|
new_column_types))
|
|
5072
|
-
agg_df = self._create_dataframe_from_node\
|
|
5305
|
+
agg_df = self._create_dataframe_from_node \
|
|
5073
5306
|
(aggregate_node_id, new_metaexpr, self._index_label)
|
|
5074
5307
|
|
|
5075
5308
|
if (operation in ["sum", "csum", "mean"] and self._check_numeric_overflow(agg_df)):
|
|
@@ -5081,7 +5314,7 @@ class DataFrame():
|
|
|
5081
5314
|
drop_columns=kwargs.get("drop_columns"))
|
|
5082
5315
|
else:
|
|
5083
5316
|
agg_df = getattr(promoted_df, operation)(distinct=kwargs.get("distinct"))
|
|
5084
|
-
|
|
5317
|
+
|
|
5085
5318
|
return agg_df
|
|
5086
5319
|
|
|
5087
5320
|
except TeradataMlException:
|
|
@@ -5089,7 +5322,7 @@ class DataFrame():
|
|
|
5089
5322
|
except Exception as err:
|
|
5090
5323
|
raise TeradataMlException(Messages.get_message(
|
|
5091
5324
|
MessageCodes.EXECUTION_FAILED, "perform {} on DataFrame".format(operation), str(err)),
|
|
5092
|
-
|
|
5325
|
+
MessageCodes.EXECUTION_FAILED) from err
|
|
5093
5326
|
|
|
5094
5327
|
def _check_numeric_overflow(self, result_df):
|
|
5095
5328
|
"""
|
|
@@ -5110,7 +5343,7 @@ class DataFrame():
|
|
|
5110
5343
|
|
|
5111
5344
|
EXAMPLES :
|
|
5112
5345
|
result = self._check_numeric_overflow(agg_df)
|
|
5113
|
-
"""
|
|
5346
|
+
"""
|
|
5114
5347
|
try:
|
|
5115
5348
|
repr(result_df)
|
|
5116
5349
|
return False
|
|
@@ -5119,7 +5352,7 @@ class DataFrame():
|
|
|
5119
5352
|
return True
|
|
5120
5353
|
else:
|
|
5121
5354
|
raise tme
|
|
5122
|
-
|
|
5355
|
+
|
|
5123
5356
|
def _promote_dataframe_types(self):
|
|
5124
5357
|
"""
|
|
5125
5358
|
Function promotes numeric column type to higher type to avoid numeric overflow
|
|
@@ -5152,9 +5385,8 @@ class DataFrame():
|
|
|
5152
5385
|
new_cols[col] = self[col].cast(type_=next_type[self[col].type.__class__])
|
|
5153
5386
|
else:
|
|
5154
5387
|
new_cols[col] = self[col]
|
|
5155
|
-
|
|
5388
|
+
|
|
5156
5389
|
return self.assign(True, **new_cols)
|
|
5157
|
-
|
|
5158
5390
|
|
|
5159
5391
|
def __repr__(self):
|
|
5160
5392
|
"""
|
|
@@ -5179,16 +5411,7 @@ class DataFrame():
|
|
|
5179
5411
|
"""
|
|
5180
5412
|
try:
|
|
5181
5413
|
|
|
5182
|
-
|
|
5183
|
-
self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
|
|
5184
|
-
|
|
5185
|
-
query = repr(self._metaexpr) + ' FROM ' + self._table_name
|
|
5186
|
-
|
|
5187
|
-
if self._orderby is not None:
|
|
5188
|
-
query += ' ORDER BY ' + self._orderby
|
|
5189
|
-
|
|
5190
|
-
# Execute the query and get the results in a list and create a Pandas DataFrame from the same.
|
|
5191
|
-
data, columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
|
|
5414
|
+
data, columns = self.__get_data_columns()
|
|
5192
5415
|
pandas_df = pd.DataFrame.from_records(data, columns=columns, coerce_float=True)
|
|
5193
5416
|
|
|
5194
5417
|
if self._index_label:
|
|
@@ -5196,7 +5419,7 @@ class DataFrame():
|
|
|
5196
5419
|
|
|
5197
5420
|
if self._undropped_index is not None:
|
|
5198
5421
|
for col in self._undropped_index:
|
|
5199
|
-
pandas_df.insert(0, col, pandas_df.index.get_level_values(col).tolist(), allow_duplicates
|
|
5422
|
+
pandas_df.insert(0, col, pandas_df.index.get_level_values(col).tolist(), allow_duplicates=True)
|
|
5200
5423
|
|
|
5201
5424
|
return pandas_df.to_string()
|
|
5202
5425
|
|
|
@@ -5209,18 +5432,15 @@ class DataFrame():
|
|
|
5209
5432
|
|
|
5210
5433
|
def _repr_html_(self):
|
|
5211
5434
|
""" Print method for teradataml for iPython rich display. """
|
|
5212
|
-
# Generate/Execute AED nodes
|
|
5213
|
-
self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
|
|
5214
|
-
|
|
5215
|
-
query = repr(self._metaexpr) + ' FROM ' + self._table_name
|
|
5216
5435
|
|
|
5217
|
-
if
|
|
5218
|
-
|
|
5219
|
-
|
|
5220
|
-
|
|
5221
|
-
|
|
5436
|
+
# Check if class attributes __data and __data_columns are not None.
|
|
5437
|
+
# If not None, reuse the data and columns.
|
|
5438
|
+
# If None, generate latest results.
|
|
5439
|
+
if self.__data is None and self.__data_columns is None:
|
|
5440
|
+
self.__get_data_columns()
|
|
5222
5441
|
|
|
5223
|
-
# Generate the HTML content from
|
|
5442
|
+
# Generate the HTML content from the class attributes __data and __data_columns
|
|
5443
|
+
# which are updated by _repr_() function call which always executes before _repr_html_().
|
|
5224
5444
|
indent = "\t"
|
|
5225
5445
|
dindent = indent + indent
|
|
5226
5446
|
|
|
@@ -5233,11 +5453,11 @@ class DataFrame():
|
|
|
5233
5453
|
html = "\n{0}".format(indent).join(header_html)
|
|
5234
5454
|
html += '<html><table>\n{0}<tr id="HeaderRow">\n'.format(indent)
|
|
5235
5455
|
|
|
5236
|
-
columns_html = "</th>\n{0}<th>".format(dindent).join(
|
|
5456
|
+
columns_html = "</th>\n{0}<th>".format(dindent).join(self.__data_columns)
|
|
5237
5457
|
html += "{0}<th>{1}</th>\n".format(dindent, columns_html)
|
|
5238
5458
|
html += "{0}</tr>\n".format(indent)
|
|
5239
5459
|
|
|
5240
|
-
for row in
|
|
5460
|
+
for row in self.__data:
|
|
5241
5461
|
row_html = ["{0}<td>{1}</td>\n".format(dindent,
|
|
5242
5462
|
cell) for cell in row]
|
|
5243
5463
|
html += "{1}<tr>\n{0}{1}</tr>\n".format("".join(row_html), indent)
|
|
@@ -5246,6 +5466,34 @@ class DataFrame():
|
|
|
5246
5466
|
|
|
5247
5467
|
return html
|
|
5248
5468
|
|
|
5469
|
+
def __get_data_columns(self):
|
|
5470
|
+
"""
|
|
5471
|
+
DESCRIPTION:
|
|
5472
|
+
Internal function to execute the node and get the result.
|
|
5473
|
+
|
|
5474
|
+
RETURNS:
|
|
5475
|
+
tuple, first element represents data for the underlying query
|
|
5476
|
+
and second element represents the column names.
|
|
5477
|
+
|
|
5478
|
+
RAISES:
|
|
5479
|
+
None.
|
|
5480
|
+
|
|
5481
|
+
EXAMPLES:
|
|
5482
|
+
self.__get_data_columns()
|
|
5483
|
+
"""
|
|
5484
|
+
self.__execute_node_and_set_table_name(self._nodeid, self._metaexpr)
|
|
5485
|
+
|
|
5486
|
+
query = repr(self._metaexpr) + ' FROM ' + self._table_name
|
|
5487
|
+
|
|
5488
|
+
if self._orderby is not None:
|
|
5489
|
+
query += ' ORDER BY ' + self._orderby
|
|
5490
|
+
|
|
5491
|
+
# Execute the query and get the results in a list.
|
|
5492
|
+
self.__data, self.__data_columns = UtilFuncs._execute_query(query=query, fetchWarnings=True)
|
|
5493
|
+
|
|
5494
|
+
return self.__data, self.__data_columns
|
|
5495
|
+
|
|
5496
|
+
@collect_queryband(queryband="DF_select")
|
|
5249
5497
|
def select(self, select_expression):
|
|
5250
5498
|
"""
|
|
5251
5499
|
DESCRIPTION:
|
|
@@ -5340,7 +5588,8 @@ class DataFrame():
|
|
|
5340
5588
|
"""
|
|
5341
5589
|
try:
|
|
5342
5590
|
if self._metaexpr is None:
|
|
5343
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
5591
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR),
|
|
5592
|
+
MessageCodes.TDMLDF_INFO_ERROR)
|
|
5344
5593
|
|
|
5345
5594
|
# If invalid, appropriate exception raised; Processing ahead only for valid expressions
|
|
5346
5595
|
select_exp_col_list = self.__validate_select_expression(select_expression)
|
|
@@ -5398,17 +5647,20 @@ class DataFrame():
|
|
|
5398
5647
|
|
|
5399
5648
|
# TODO: Remove this check when same column multiple selection enabled
|
|
5400
5649
|
if len(select_exp_col_list) > len(df_column_list):
|
|
5401
|
-
raise TeradataMlException(
|
|
5402
|
-
|
|
5650
|
+
raise TeradataMlException(
|
|
5651
|
+
Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_COLUMN, ', '.join(df_column_list)),
|
|
5652
|
+
MessageCodes.TDMLDF_SELECT_INVALID_COLUMN)
|
|
5403
5653
|
|
|
5404
|
-
all_cols_exist =
|
|
5654
|
+
all_cols_exist = all(col in df_column_list for col in select_exp_col_list)
|
|
5405
5655
|
|
|
5406
5656
|
if not all_cols_exist:
|
|
5407
|
-
raise TeradataMlException(
|
|
5408
|
-
|
|
5657
|
+
raise TeradataMlException(
|
|
5658
|
+
Messages.get_message(MessageCodes.TDMLDF_SELECT_INVALID_COLUMN, ', '.join(df_column_list)),
|
|
5659
|
+
MessageCodes.TDMLDF_SELECT_INVALID_COLUMN)
|
|
5409
5660
|
|
|
5410
5661
|
return select_exp_col_list
|
|
5411
5662
|
|
|
5663
|
+
@collect_queryband(queryband="DF_toPandas")
|
|
5412
5664
|
def to_pandas(self, index_column=None, num_rows=99999, all_rows=False,
|
|
5413
5665
|
fastexport=False, catch_errors_warnings=False, **kwargs):
|
|
5414
5666
|
"""
|
|
@@ -5798,7 +6050,7 @@ class DataFrame():
|
|
|
5798
6050
|
"Pandas DataFrame", str(err)),
|
|
5799
6051
|
MessageCodes.DATA_EXPORT_FAILED)
|
|
5800
6052
|
|
|
5801
|
-
@
|
|
6053
|
+
@collect_queryband(queryband="DF_join")
|
|
5802
6054
|
def join(self, other, on=None, how="left", lsuffix=None, rsuffix=None,
|
|
5803
6055
|
lprefix=None, rprefix=None):
|
|
5804
6056
|
"""
|
|
@@ -5875,18 +6127,12 @@ class DataFrame():
|
|
|
5875
6127
|
lsuffix:
|
|
5876
6128
|
Optional Argument.
|
|
5877
6129
|
Specifies the suffix to be added to the left table columns.
|
|
5878
|
-
Note:
|
|
5879
|
-
Behavior of the argument will change in future.
|
|
5880
|
-
Use "lprefix" instead.
|
|
5881
6130
|
Default Value: None.
|
|
5882
6131
|
Types: str
|
|
5883
6132
|
|
|
5884
6133
|
rsuffix:
|
|
5885
6134
|
Optional Argument.
|
|
5886
6135
|
Specifies the suffix to be added to the right table columns.
|
|
5887
|
-
Note:
|
|
5888
|
-
Behavior of the argument will change in future.
|
|
5889
|
-
Use "rprefix" instead.
|
|
5890
6136
|
Default Value: None.
|
|
5891
6137
|
Types: str
|
|
5892
6138
|
|
|
@@ -6045,15 +6291,13 @@ class DataFrame():
|
|
|
6045
6291
|
# Validate argument types
|
|
6046
6292
|
_Validators._validate_function_arguments(awu_matrix)
|
|
6047
6293
|
|
|
6048
|
-
|
|
6049
|
-
|
|
6050
6294
|
# If user has not provided suffix argument(s), then prefix argument(s) value(s) are passed by
|
|
6051
6295
|
# user hence we will set the affix variables (laffix and raffix) with provided value(s).
|
|
6052
6296
|
# affix_type is also set appropriately.
|
|
6053
6297
|
if lsuffix is not None or rsuffix is not None:
|
|
6054
6298
|
laffix = lsuffix
|
|
6055
6299
|
raffix = rsuffix
|
|
6056
|
-
affix_type = "suffix"
|
|
6300
|
+
affix_type = "suffix"
|
|
6057
6301
|
else:
|
|
6058
6302
|
laffix = lprefix
|
|
6059
6303
|
raffix = rprefix
|
|
@@ -6079,12 +6323,13 @@ class DataFrame():
|
|
|
6079
6323
|
if column in other_columns_lower_actual_map.keys():
|
|
6080
6324
|
if laffix is None and raffix is None:
|
|
6081
6325
|
raise TeradataMlException(
|
|
6082
|
-
Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
|
|
6083
|
-
|
|
6326
|
+
Messages.get_message(MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS),
|
|
6327
|
+
MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS)
|
|
6328
|
+
|
|
6084
6329
|
# Both affixes should not be equal to perform join.
|
|
6085
6330
|
if laffix == raffix and laffix is not None:
|
|
6086
6331
|
raise TeradataMlException(
|
|
6087
|
-
Messages.get_message(MessageCodes.TDMLDF_INVALID_TABLE_ALIAS,
|
|
6332
|
+
Messages.get_message(MessageCodes.TDMLDF_INVALID_TABLE_ALIAS,
|
|
6088
6333
|
"'l{affix_type}' and 'r{affix_type}'".format(affix_type=affix_type)),
|
|
6089
6334
|
MessageCodes.TDMLDF_INVALID_TABLE_ALIAS)
|
|
6090
6335
|
|
|
@@ -6120,7 +6365,7 @@ class DataFrame():
|
|
|
6120
6365
|
|
|
6121
6366
|
if isinstance(ori_condition, str):
|
|
6122
6367
|
columns = [column.strip() for column in condition.split(sep=conditional_separator)
|
|
6123
|
-
|
|
6368
|
+
if len(column) > 0]
|
|
6124
6369
|
|
|
6125
6370
|
if len(columns) != 2:
|
|
6126
6371
|
invalid_join_conditions.append(condition)
|
|
@@ -6135,7 +6380,8 @@ class DataFrame():
|
|
|
6135
6380
|
|
|
6136
6381
|
if len(invalid_join_conditions) > 0:
|
|
6137
6382
|
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_INVALID_JOIN_CONDITION,
|
|
6138
|
-
|
|
6383
|
+
", ".join(invalid_join_conditions)),
|
|
6384
|
+
MessageCodes.TDMLDF_INVALID_JOIN_CONDITION)
|
|
6139
6385
|
|
|
6140
6386
|
join_condition = " and ".join(all_join_conditions)
|
|
6141
6387
|
else:
|
|
@@ -6156,14 +6402,16 @@ class DataFrame():
|
|
|
6156
6402
|
df1_column_with_affix = self.__check_and_return_new_column_name(laffix, other_column,
|
|
6157
6403
|
other_columns_lower_actual_map.keys(),
|
|
6158
6404
|
"right", affix_type)
|
|
6159
|
-
select_columns.append("{0} as {1}".format(
|
|
6160
|
-
|
|
6405
|
+
select_columns.append("{0} as {1}".format(
|
|
6406
|
+
self.__get_fully_qualified_col_name(other_column, "df1" if laffix is None else laffix),
|
|
6407
|
+
df1_column_with_affix))
|
|
6161
6408
|
|
|
6162
6409
|
df2_column_with_affix = self.__check_and_return_new_column_name(raffix, column,
|
|
6163
6410
|
self_columns_lower_actual_map.keys(),
|
|
6164
6411
|
"left", affix_type)
|
|
6165
|
-
select_columns.append("{0} as {1}".format(
|
|
6166
|
-
|
|
6412
|
+
select_columns.append("{0} as {1}".format(
|
|
6413
|
+
self.__get_fully_qualified_col_name(column, "df2" if raffix is None else raffix),
|
|
6414
|
+
df2_column_with_affix))
|
|
6167
6415
|
|
|
6168
6416
|
# As we are creating new column name, adding it to new metadata dict for new dataframe from join.
|
|
6169
6417
|
self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
|
|
@@ -6173,7 +6421,7 @@ class DataFrame():
|
|
|
6173
6421
|
self.__add_column_type_item_to_dict(new_metaexpr_columns_types,
|
|
6174
6422
|
UtilFuncs._teradata_unquote_arg(df2_column_with_affix, "\""),
|
|
6175
6423
|
other_column, df2_columns_types)
|
|
6176
|
-
|
|
6424
|
+
|
|
6177
6425
|
else:
|
|
6178
6426
|
# As column not present in right DataFrame, directly adding column to new metadata dict.
|
|
6179
6427
|
self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column, column, df1_columns_types)
|
|
@@ -6187,7 +6435,7 @@ class DataFrame():
|
|
|
6187
6435
|
|
|
6188
6436
|
# Create a node in AED using _aed_join
|
|
6189
6437
|
join_node_id = self._aed_utils._aed_join(self._nodeid, other._nodeid, ", ".join(select_columns), how_lc,
|
|
6190
|
-
join_condition, "df1" if laffix is None else laffix,
|
|
6438
|
+
join_condition, "df1" if laffix is None else laffix,
|
|
6191
6439
|
"df2" if raffix is None else raffix)
|
|
6192
6440
|
|
|
6193
6441
|
# Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid and underlying table name.
|
|
@@ -6274,9 +6522,7 @@ class DataFrame():
|
|
|
6274
6522
|
"""
|
|
6275
6523
|
if affix is None:
|
|
6276
6524
|
return UtilFuncs._teradata_quote_arg(column, "\"", False)
|
|
6277
|
-
|
|
6278
|
-
affix_type = "prefix" # TODO: [ELE-5480] Remove this line to enable suffix addition.
|
|
6279
|
-
|
|
6525
|
+
|
|
6280
6526
|
# If Prefix, affix is added before column name else it is appended.
|
|
6281
6527
|
df1_column_with_affix = "{0}_{1}" if affix_type == "prefix" else "{1}_{0}"
|
|
6282
6528
|
df1_column_with_affix = df1_column_with_affix.format(affix,
|
|
@@ -6292,7 +6538,7 @@ class DataFrame():
|
|
|
6292
6538
|
MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS)
|
|
6293
6539
|
return UtilFuncs._teradata_quote_arg(df1_column_with_affix, "\"", False)
|
|
6294
6540
|
|
|
6295
|
-
def __add_column_type_item_to_dict(self, new_metadata_dict, new_column,column, column_types):
|
|
6541
|
+
def __add_column_type_item_to_dict(self, new_metadata_dict, new_column, column, column_types):
|
|
6296
6542
|
"""
|
|
6297
6543
|
Add a column as key and datatype as a value to dictionary
|
|
6298
6544
|
|
|
@@ -6355,20 +6601,20 @@ class DataFrame():
|
|
|
6355
6601
|
return final
|
|
6356
6602
|
else:
|
|
6357
6603
|
return colnames_list
|
|
6358
|
-
|
|
6604
|
+
|
|
6359
6605
|
elif kind == 'mergesort':
|
|
6360
6606
|
if ascending == True:
|
|
6361
6607
|
return sorted(colnames_list)
|
|
6362
6608
|
else:
|
|
6363
|
-
return sorted(colnames_list, reverse=True)
|
|
6364
|
-
|
|
6609
|
+
return sorted(colnames_list, reverse=True)
|
|
6610
|
+
|
|
6365
6611
|
elif kind == 'heapsort':
|
|
6366
|
-
end = len(colnames_list)
|
|
6612
|
+
end = len(colnames_list)
|
|
6367
6613
|
start = end // 2 - 1
|
|
6368
|
-
for i in range(start, -1, -1):
|
|
6369
|
-
self.__get_heap(colnames_list, end, i)
|
|
6370
|
-
for i in range(end-1, 0, -1):
|
|
6371
|
-
#swap(i, 0)
|
|
6614
|
+
for i in range(start, -1, -1):
|
|
6615
|
+
self.__get_heap(colnames_list, end, i)
|
|
6616
|
+
for i in range(end - 1, 0, -1):
|
|
6617
|
+
# swap(i, 0)
|
|
6372
6618
|
colnames_list[i], colnames_list[0] = colnames_list[0], colnames_list[i]
|
|
6373
6619
|
colnames_list = self.__get_heap(colnames_list, i, 0)
|
|
6374
6620
|
if ascending == True:
|
|
@@ -6394,9 +6640,9 @@ class DataFrame():
|
|
|
6394
6640
|
RETURNS:
|
|
6395
6641
|
Sorted list of column names indexed at i
|
|
6396
6642
|
"""
|
|
6397
|
-
l=2 * i + 1
|
|
6398
|
-
r=2 * (i + 1)
|
|
6399
|
-
max=i
|
|
6643
|
+
l = 2 * i + 1
|
|
6644
|
+
r = 2 * (i + 1)
|
|
6645
|
+
max = i
|
|
6400
6646
|
if l < n and colnames_list[i] < colnames_list[l]:
|
|
6401
6647
|
max = l
|
|
6402
6648
|
if r < n and colnames_list[max] < colnames_list[r]:
|
|
@@ -6406,7 +6652,8 @@ class DataFrame():
|
|
|
6406
6652
|
self.__get_heap(colnames_list, n, max)
|
|
6407
6653
|
return colnames_list
|
|
6408
6654
|
|
|
6409
|
-
|
|
6655
|
+
@collect_queryband(queryband="DF_toSql")
|
|
6656
|
+
def to_sql(self, table_name, if_exists='fail', primary_index=None, temporary=False, schema_name=None, types=None,
|
|
6410
6657
|
primary_time_index_name=None, timecode_column=None, timebucket_duration=None,
|
|
6411
6658
|
timezero_date=None, columns_list=None, sequence_column=None, seq_max=None, set_table=False):
|
|
6412
6659
|
"""
|
|
@@ -6619,13 +6866,13 @@ class DataFrame():
|
|
|
6619
6866
|
|
|
6620
6867
|
"""
|
|
6621
6868
|
|
|
6622
|
-
return copy_to_sql(df
|
|
6623
|
-
|
|
6624
|
-
|
|
6625
|
-
|
|
6626
|
-
|
|
6627
|
-
|
|
6628
|
-
|
|
6869
|
+
return copy_to_sql(df=self, table_name=table_name, schema_name=schema_name,
|
|
6870
|
+
index=False, index_label=None, temporary=temporary,
|
|
6871
|
+
primary_index=primary_index, if_exists=if_exists, types=types,
|
|
6872
|
+
primary_time_index_name=primary_time_index_name, timecode_column=timecode_column,
|
|
6873
|
+
timebucket_duration=timebucket_duration, timezero_date=timezero_date,
|
|
6874
|
+
columns_list=columns_list,
|
|
6875
|
+
sequence_column=sequence_column, seq_max=seq_max, set_table=set_table)
|
|
6629
6876
|
|
|
6630
6877
|
def _get_assign_allowed_types(self):
|
|
6631
6878
|
"""
|
|
@@ -6694,7 +6941,7 @@ class DataFrame():
|
|
|
6694
6941
|
new_meta = UtilFuncs._get_metaexpr_using_parent_metaexpr(new_nodeid, new_meta)
|
|
6695
6942
|
return (new_meta, new_nodeid)
|
|
6696
6943
|
|
|
6697
|
-
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns
|
|
6944
|
+
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
|
|
6698
6945
|
"""
|
|
6699
6946
|
DESCRIPTION:
|
|
6700
6947
|
Function to create a teradataml DataFrame from node.
|
|
@@ -6702,7 +6949,7 @@ class DataFrame():
|
|
|
6702
6949
|
be overridden by the child classes if required.
|
|
6703
6950
|
|
|
6704
6951
|
For example,
|
|
6705
|
-
This will always
|
|
6952
|
+
This will always return a teradataml DataFrame, but for
|
|
6706
6953
|
GeoDataFrame, we will return teradataml DataFrame or teradataml
|
|
6707
6954
|
GeoDataFrame, based on whether the resultant DataFrame contains
|
|
6708
6955
|
geometry column or not.
|
|
@@ -6786,7 +7033,8 @@ class DataFrame():
|
|
|
6786
7033
|
self.__execute_node_and_set_table_name(self._nodeid)
|
|
6787
7034
|
return True
|
|
6788
7035
|
|
|
6789
|
-
|
|
7036
|
+
@collect_queryband(queryband="DF_assign")
|
|
7037
|
+
def assign(self, drop_columns=False, **kwargs):
|
|
6790
7038
|
"""
|
|
6791
7039
|
DESCRIPTION:
|
|
6792
7040
|
Assign new columns to a teradataml DataFrame.
|
|
@@ -7127,12 +7375,12 @@ class DataFrame():
|
|
|
7127
7375
|
is_allowed = lambda x: isinstance(*x) and type(x[0]) != bool
|
|
7128
7376
|
value_type_allowed = map(is_allowed, ((val, t) for t in allowed_types))
|
|
7129
7377
|
|
|
7130
|
-
#if callable(val):
|
|
7378
|
+
# if callable(val):
|
|
7131
7379
|
# err = 'Unsupported callable value for key: {}'.format(key)
|
|
7132
7380
|
# raise ValueError(err)
|
|
7133
7381
|
|
|
7134
7382
|
if not any(list(value_type_allowed)):
|
|
7135
|
-
err = 'Unsupported values of type {t} for key {k}'.format(k
|
|
7383
|
+
err = 'Unsupported values of type {t} for key {k}'.format(k=key, t=type(val))
|
|
7136
7384
|
raise ValueError(err)
|
|
7137
7385
|
|
|
7138
7386
|
if isinstance(val, ClauseElement) and not node_executed:
|
|
@@ -7153,6 +7401,7 @@ class DataFrame():
|
|
|
7153
7401
|
msg = Messages.get_message(MessageCodes.TDMLDF_INFO_ERROR)
|
|
7154
7402
|
raise TeradataMlException(msg, errcode) from err
|
|
7155
7403
|
|
|
7404
|
+
@collect_queryband(queryband="DF_get")
|
|
7156
7405
|
def get(self, key):
|
|
7157
7406
|
"""
|
|
7158
7407
|
DESCRIPTION:
|
|
@@ -7232,7 +7481,8 @@ class DataFrame():
|
|
|
7232
7481
|
"""
|
|
7233
7482
|
return self.select(key)
|
|
7234
7483
|
|
|
7235
|
-
|
|
7484
|
+
@collect_queryband(queryband="DF_setIndex")
|
|
7485
|
+
def set_index(self, keys, drop=True, append=False):
|
|
7236
7486
|
"""
|
|
7237
7487
|
DESCRIPTION:
|
|
7238
7488
|
Assigns one or more existing columns as the new index to a teradataml DataFrame.
|
|
@@ -7447,7 +7697,8 @@ class DataFrame():
|
|
|
7447
7697
|
"""
|
|
7448
7698
|
return self._index_label
|
|
7449
7699
|
|
|
7450
|
-
|
|
7700
|
+
@collect_queryband(queryband="DF_groupby")
|
|
7701
|
+
def groupby(self, columns_expr, **kwargs):
|
|
7451
7702
|
"""
|
|
7452
7703
|
DESCRIPTION:
|
|
7453
7704
|
Applies GroupBy to one or more columns of a teradataml Dataframe.
|
|
@@ -7460,6 +7711,16 @@ class DataFrame():
|
|
|
7460
7711
|
Specifies the column name(s) to group by.
|
|
7461
7712
|
Types: str OR list of Strings (str)
|
|
7462
7713
|
|
|
7714
|
+
kwargs:
|
|
7715
|
+
Optional Argument.
|
|
7716
|
+
Specifies keyword arguments.
|
|
7717
|
+
|
|
7718
|
+
option:
|
|
7719
|
+
Optional Argument.
|
|
7720
|
+
Specifies the groupby option.
|
|
7721
|
+
Permitted Values: "CUBE", "ROLLUP", None
|
|
7722
|
+
Types: str or NoneType
|
|
7723
|
+
|
|
7463
7724
|
NOTES:
|
|
7464
7725
|
1. Users can still apply teradataml DataFrame methods (filters/sort/etc) on top of the result.
|
|
7465
7726
|
2. Consecutive operations of grouping, i.e., groupby_time(), resample() and groupby() are not permitted.
|
|
@@ -7486,26 +7747,31 @@ class DataFrame():
|
|
|
7486
7747
|
|
|
7487
7748
|
"""
|
|
7488
7749
|
# Argument validations
|
|
7489
|
-
|
|
7490
|
-
|
|
7750
|
+
arg_info_matrix = []
|
|
7751
|
+
arg_info_matrix.append(["columns_expr", columns_expr, False, (str, list), True])
|
|
7752
|
+
option = kwargs.get("option", None)
|
|
7753
|
+
arg_info_matrix.append(["option", option, True, (str, type(None)), True,
|
|
7754
|
+
["CUBE", "ROLLUP", None]])
|
|
7491
7755
|
|
|
7492
7756
|
# Validate argument types
|
|
7493
|
-
_Validators._validate_function_arguments(
|
|
7757
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
7494
7758
|
|
|
7495
7759
|
# Checking each element in passed columns to be valid column in dataframe
|
|
7496
7760
|
_Validators._validate_column_exists_in_dataframe(columns_expr, self._metaexpr)
|
|
7497
7761
|
|
|
7498
7762
|
try:
|
|
7499
|
-
column_list=[]
|
|
7500
|
-
unsupported_types = ['BLOB', 'CLOB', 'PERIOD_DATE', 'PERIOD_TIME', 'PERIOD_TIMESTAMP', 'ARRAY', 'VARRAY',
|
|
7501
|
-
|
|
7763
|
+
column_list = []
|
|
7764
|
+
unsupported_types = ['BLOB', 'CLOB', 'PERIOD_DATE', 'PERIOD_TIME', 'PERIOD_TIMESTAMP', 'ARRAY', 'VARRAY',
|
|
7765
|
+
'XML', 'JSON']
|
|
7766
|
+
type_expr = []
|
|
7502
7767
|
invalid_types = []
|
|
7503
7768
|
# check for consecutive groupby operations
|
|
7504
|
-
if isinstance(self, DataFrameGroupBy) or isinstance(self, DataFrameGroupByTime)
|
|
7505
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_OPERATION),
|
|
7769
|
+
if isinstance(self, DataFrameGroupBy) or isinstance(self, DataFrameGroupByTime):
|
|
7770
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_OPERATION),
|
|
7771
|
+
MessageCodes.UNSUPPORTED_OPERATION)
|
|
7506
7772
|
|
|
7507
7773
|
if (isinstance(columns_expr, list)):
|
|
7508
|
-
column_list=columns_expr
|
|
7774
|
+
column_list = columns_expr
|
|
7509
7775
|
|
|
7510
7776
|
elif (isinstance(columns_expr, str)):
|
|
7511
7777
|
column_list.append(columns_expr)
|
|
@@ -7530,15 +7796,15 @@ class DataFrame():
|
|
|
7530
7796
|
|
|
7531
7797
|
groupbyexpr = ', '.join(UtilFuncs._teradata_quote_arg(col, "\"", False) for col in column_list)
|
|
7532
7798
|
groupbyObj = DataFrameGroupBy(self._nodeid, self._metaexpr, self._column_names_and_types, self.columns,
|
|
7533
|
-
groupbyexpr, column_list)
|
|
7799
|
+
groupbyexpr, column_list, option)
|
|
7534
7800
|
return groupbyObj
|
|
7535
7801
|
except TeradataMlException:
|
|
7536
7802
|
raise
|
|
7537
7803
|
|
|
7538
|
-
def __group_time_series_data(self, timebucket_duration, timebucket_duration_arg_name
|
|
7539
|
-
value_expression
|
|
7540
|
-
timecode_column_arg_name
|
|
7541
|
-
fill
|
|
7804
|
+
def __group_time_series_data(self, timebucket_duration, timebucket_duration_arg_name="timebucket_duration",
|
|
7805
|
+
value_expression=None, timecode_column=None,
|
|
7806
|
+
timecode_column_arg_name="timecode_column", sequence_column=None,
|
|
7807
|
+
fill=None, fill_arg_name="fill"):
|
|
7542
7808
|
"""
|
|
7543
7809
|
DESCRIPTION:
|
|
7544
7810
|
Internal function to resample/group time series data using Group By Time and a column.
|
|
@@ -7782,7 +8048,8 @@ class DataFrame():
|
|
|
7782
8048
|
|
|
7783
8049
|
if len(invalid_types) > 0:
|
|
7784
8050
|
raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, invalid_types,
|
|
7785
|
-
"ANY, except following {}".format(
|
|
8051
|
+
"ANY, except following {}".format(
|
|
8052
|
+
unsupported_types)),
|
|
7786
8053
|
MessageCodes.UNSUPPORTED_DATATYPE)
|
|
7787
8054
|
|
|
7788
8055
|
groupby_column_expr = ', '.join(UtilFuncs._teradata_quote_arg(col, "\"", False)
|
|
@@ -7792,7 +8059,7 @@ class DataFrame():
|
|
|
7792
8059
|
|
|
7793
8060
|
groupbyObj = DataFrameGroupByTime(nodeid=self._nodeid, metaexpr=self._metaexpr,
|
|
7794
8061
|
column_names_and_types=self._column_names_and_types, columns=self.columns,
|
|
7795
|
-
groupby_value_expr
|
|
8062
|
+
groupby_value_expr=groupby_column_expr,
|
|
7796
8063
|
column_list=group_by_column_list, timebucket_duration=timebucket_duration,
|
|
7797
8064
|
value_expression=value_expression, timecode_column=timecode_column,
|
|
7798
8065
|
sequence_column=sequence_column, fill=fill)
|
|
@@ -7800,8 +8067,9 @@ class DataFrame():
|
|
|
7800
8067
|
except TeradataMlException:
|
|
7801
8068
|
raise
|
|
7802
8069
|
|
|
7803
|
-
|
|
7804
|
-
|
|
8070
|
+
@collect_queryband(queryband="DF_groupbyTime")
|
|
8071
|
+
def groupby_time(self, timebucket_duration, value_expression=None, timecode_column=None, sequence_column=None,
|
|
8072
|
+
fill=None):
|
|
7805
8073
|
"""
|
|
7806
8074
|
DESCRIPTION:
|
|
7807
8075
|
Apply Group By Time to one or more columns of a teradataml DataFrame.
|
|
@@ -8079,11 +8347,12 @@ class DataFrame():
|
|
|
8079
8347
|
|
|
8080
8348
|
"""
|
|
8081
8349
|
return self.__group_time_series_data(timebucket_duration=timebucket_duration, value_expression=value_expression,
|
|
8082
|
-
timecode_column
|
|
8083
|
-
fill
|
|
8350
|
+
timecode_column=timecode_column, sequence_column=sequence_column,
|
|
8351
|
+
fill=fill)
|
|
8084
8352
|
|
|
8085
|
-
|
|
8086
|
-
|
|
8353
|
+
@collect_queryband(queryband="DF_resample")
|
|
8354
|
+
def resample(self, rule, value_expression=None, on=None, sequence_column=None,
|
|
8355
|
+
fill_method=None):
|
|
8087
8356
|
"""
|
|
8088
8357
|
DESCRIPTION:
|
|
8089
8358
|
Resample time series data. This function allows grouping done by time on
|
|
@@ -8360,10 +8629,11 @@ class DataFrame():
|
|
|
8360
8629
|
"""
|
|
8361
8630
|
return self.__group_time_series_data(timebucket_duration=rule, timebucket_duration_arg_name="rule",
|
|
8362
8631
|
value_expression=value_expression, timecode_column_arg_name="on",
|
|
8363
|
-
timecode_column
|
|
8364
|
-
fill
|
|
8632
|
+
timecode_column=on, sequence_column=sequence_column,
|
|
8633
|
+
fill=fill_method, fill_arg_name="fill_method")
|
|
8365
8634
|
|
|
8366
|
-
|
|
8635
|
+
@collect_queryband(queryband="DF_getValues")
|
|
8636
|
+
def get_values(self, num_rows=99999):
|
|
8367
8637
|
"""
|
|
8368
8638
|
DESCRIPTION:
|
|
8369
8639
|
Retrieves all values (only) present in a teradataml DataFrame.
|
|
@@ -8548,6 +8818,7 @@ class DataFrame():
|
|
|
8548
8818
|
dimension = self.shape
|
|
8549
8819
|
return dimension[0] * dimension[1]
|
|
8550
8820
|
|
|
8821
|
+
@collect_queryband(queryband="DF_merge")
|
|
8551
8822
|
def merge(self, right, on=None, how="inner", left_on=None, right_on=None, use_index=False,
|
|
8552
8823
|
lsuffix=None, rsuffix=None):
|
|
8553
8824
|
"""
|
|
@@ -8555,6 +8826,7 @@ class DataFrame():
|
|
|
8555
8826
|
Merges two teradataml DataFrames together.
|
|
8556
8827
|
|
|
8557
8828
|
Supported merge operations are:
|
|
8829
|
+
- cross: Returns cartesian product between the two dataframes.
|
|
8558
8830
|
- inner: Returns only matching rows, non-matching rows are eliminated.
|
|
8559
8831
|
- left: Returns all matching rows plus non-matching rows from the left teradataml DataFrame.
|
|
8560
8832
|
- right: Returns all matching rows plus non-matching rows from the right teradataml DataFrame.
|
|
@@ -8767,27 +9039,26 @@ class DataFrame():
|
|
|
8767
9039
|
Messages.get_message(MessageCodes.MUST_PASS_ARGUMENT, "left_on", "right_on"),
|
|
8768
9040
|
MessageCodes.MUST_PASS_ARGUMENT)
|
|
8769
9041
|
|
|
8770
|
-
if isinstance(on,list):
|
|
9042
|
+
if isinstance(on, list):
|
|
8771
9043
|
join_conditions = on
|
|
8772
9044
|
elif isinstance(on, (str, ColumnExpression)):
|
|
8773
9045
|
join_conditions = [on]
|
|
8774
9046
|
else:
|
|
8775
9047
|
join_conditions = []
|
|
8776
9048
|
|
|
8777
|
-
|
|
8778
9049
|
if isinstance(left_on, list) and isinstance(right_on, list) and len(left_on) != len(right_on):
|
|
8779
9050
|
raise TeradataMlException(
|
|
8780
|
-
|
|
8781
|
-
|
|
9051
|
+
Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
|
|
9052
|
+
MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
|
|
8782
9053
|
|
|
8783
9054
|
elif isinstance(left_on, list) and isinstance(right_on, (str, ColumnExpression)) and len(left_on) != 1:
|
|
8784
9055
|
raise TeradataMlException(
|
|
8785
|
-
|
|
8786
|
-
|
|
9056
|
+
Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
|
|
9057
|
+
MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
|
|
8787
9058
|
|
|
8788
9059
|
elif isinstance(right_on, list) and isinstance(left_on, (str, ColumnExpression)) and len(right_on) != 1:
|
|
8789
9060
|
raise TeradataMlException(
|
|
8790
|
-
|
|
9061
|
+
Messages.get_message(MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS, "left_on", "right_on"),
|
|
8791
9062
|
MessageCodes.TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS)
|
|
8792
9063
|
|
|
8793
9064
|
if left_on is not None and not isinstance(left_on, list):
|
|
@@ -8812,7 +9083,6 @@ class DataFrame():
|
|
|
8812
9083
|
if isinstance(right_on[index], ColumnExpression):
|
|
8813
9084
|
right_on[index] = right_on[index].compile()
|
|
8814
9085
|
|
|
8815
|
-
|
|
8816
9086
|
if left_on is not None and right_on is not None:
|
|
8817
9087
|
for left_column, right_column in zip(left_on, right_on):
|
|
8818
9088
|
join_conditions.append("{} = {}".format(tdp.quote(left_column), tdp.quote(right_column)))
|
|
@@ -8828,7 +9098,7 @@ class DataFrame():
|
|
|
8828
9098
|
|
|
8829
9099
|
if use_index:
|
|
8830
9100
|
if self._index_label is None or right._index_label is None:
|
|
8831
|
-
|
|
9101
|
+
raise TeradataMlException(
|
|
8832
9102
|
Messages.get_message(MessageCodes.TDMLDF_INDEXES_ARE_NONE), MessageCodes.TDMLDF_INDEXES_ARE_NONE)
|
|
8833
9103
|
|
|
8834
9104
|
left_index_labels = self._index_label
|
|
@@ -8841,9 +9111,9 @@ class DataFrame():
|
|
|
8841
9111
|
for left_index_label, right_index_label in zip(left_index_labels, right_index_labels):
|
|
8842
9112
|
join_conditions.append("{} = {}".format(tdp.quote(left_index_label), tdp.quote(right_index_label)))
|
|
8843
9113
|
|
|
8844
|
-
|
|
8845
9114
|
return self.join(other=right, on=join_conditions, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
|
|
8846
9115
|
|
|
9116
|
+
@collect_queryband(queryband="DF_squeeze")
|
|
8847
9117
|
def squeeze(self, axis=None):
|
|
8848
9118
|
"""
|
|
8849
9119
|
DESCRIPTION:
|
|
@@ -8955,7 +9225,7 @@ class DataFrame():
|
|
|
8955
9225
|
num_row, num_col = self.shape
|
|
8956
9226
|
|
|
8957
9227
|
# Check if the number of elements in DF = 1
|
|
8958
|
-
if (num_row, num_col) == (1,1) and axis is None:
|
|
9228
|
+
if (num_row, num_col) == (1, 1) and axis is None:
|
|
8959
9229
|
# To get the single row/column value in the DF, we need to execute the node
|
|
8960
9230
|
# Generate/Execute AED nodes
|
|
8961
9231
|
self.__execute_node_and_set_table_name(self._nodeid)
|
|
@@ -8981,12 +9251,13 @@ class DataFrame():
|
|
|
8981
9251
|
return self
|
|
8982
9252
|
|
|
8983
9253
|
if axis == 1:
|
|
8984
|
-
return Series._from_dataframe(self, axis
|
|
9254
|
+
return Series._from_dataframe(self, axis=1)
|
|
8985
9255
|
else:
|
|
8986
9256
|
# TODO : Research and add capabilities to handle rowexpression based return objects
|
|
8987
9257
|
# For now, returning the DataFrame as is
|
|
8988
9258
|
return self
|
|
8989
9259
|
|
|
9260
|
+
@collect_queryband(queryband="DF_sortIndex")
|
|
8990
9261
|
def sort_index(self, axis=0, ascending=True, kind='quicksort'):
|
|
8991
9262
|
"""
|
|
8992
9263
|
DESCRIPTION:
|
|
@@ -9093,6 +9364,7 @@ class DataFrame():
|
|
|
9093
9364
|
except TeradataMlException:
|
|
9094
9365
|
raise
|
|
9095
9366
|
|
|
9367
|
+
@collect_queryband(queryband="DF_concat")
|
|
9096
9368
|
def concat(self, other, join='OUTER', allow_duplicates=True, sort=False, ignore_index=False):
|
|
9097
9369
|
"""
|
|
9098
9370
|
DESCRIPTION:
|
|
@@ -9278,14 +9550,14 @@ class DataFrame():
|
|
|
9278
9550
|
if isinstance(samples, float) and samples > 1:
|
|
9279
9551
|
raise TeradataMlException(
|
|
9280
9552
|
Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
|
|
9281
|
-
|
|
9282
|
-
|
|
9553
|
+
"greater than 0 and less than or equal to 1"),
|
|
9554
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
9283
9555
|
if isinstance(samples, list) and all(isinstance(item, float) for item in samples) \
|
|
9284
|
-
|
|
9556
|
+
and sum(samples) > 1:
|
|
9285
9557
|
raise TeradataMlException(
|
|
9286
9558
|
Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
|
|
9287
|
-
|
|
9288
|
-
|
|
9559
|
+
"a list having sum of all elements greater than 0 and less than or equal to 1"),
|
|
9560
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
9289
9561
|
|
|
9290
9562
|
return True
|
|
9291
9563
|
|
|
@@ -9316,10 +9588,10 @@ class DataFrame():
|
|
|
9316
9588
|
|
|
9317
9589
|
# Raise exception if the length of list is greater than 16.
|
|
9318
9590
|
if len(samples) > 16:
|
|
9319
|
-
|
|
9591
|
+
raise TeradataMlException(
|
|
9320
9592
|
Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
|
|
9321
|
-
|
|
9322
|
-
|
|
9593
|
+
"a list having less than or equal to 16 samples"),
|
|
9594
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
9323
9595
|
|
|
9324
9596
|
return True
|
|
9325
9597
|
|
|
@@ -9354,26 +9626,26 @@ class DataFrame():
|
|
|
9354
9626
|
|
|
9355
9627
|
# Raise exception if number of rows given are negative.
|
|
9356
9628
|
if isinstance(samples, (int, float)) and samples < 0 or isinstance(samples, list) \
|
|
9357
|
-
|
|
9629
|
+
and any(item < 0 for item in samples):
|
|
9358
9630
|
raise TeradataMlException(
|
|
9359
9631
|
Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
|
|
9360
|
-
|
|
9361
|
-
|
|
9632
|
+
"greater than 0"),
|
|
9633
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
9362
9634
|
|
|
9363
9635
|
# Raise exception if fractions specified as 0.
|
|
9364
|
-
if isinstance(samples,
|
|
9365
|
-
|
|
9366
|
-
|
|
9636
|
+
if isinstance(samples, float) and samples == 0 or (isinstance(samples, list) \
|
|
9637
|
+
and all(isinstance(item, float) for item in samples)
|
|
9638
|
+
and any(item == 0 for item in samples)):
|
|
9367
9639
|
raise TeradataMlException(
|
|
9368
9640
|
Messages.get_message(MessageCodes.INVALID_ARG_VALUE, str(samples), arg_name,
|
|
9369
|
-
|
|
9370
|
-
|
|
9371
|
-
|
|
9641
|
+
"greater than 0"),
|
|
9642
|
+
MessageCodes.INVALID_ARG_VALUE)
|
|
9372
9643
|
|
|
9373
9644
|
return True
|
|
9374
9645
|
|
|
9375
|
-
|
|
9376
|
-
|
|
9646
|
+
@collect_queryband(queryband="DF_sample")
|
|
9647
|
+
def sample(self, n=None, frac=None, replace=False, randomize=False, case_when_then=None, case_else=None,
|
|
9648
|
+
stratify_column=None, seed=None, id_column=None):
|
|
9377
9649
|
"""
|
|
9378
9650
|
DESCRIPTION:
|
|
9379
9651
|
Allows to sample few rows from dataframe directly or based on conditions.
|
|
@@ -9679,21 +9951,21 @@ class DataFrame():
|
|
|
9679
9951
|
try:
|
|
9680
9952
|
if n is not None and frac is not None:
|
|
9681
9953
|
raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
9682
|
-
|
|
9683
|
-
|
|
9954
|
+
"n", "frac"),
|
|
9955
|
+
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
9684
9956
|
if n is not None and case_when_then is not None:
|
|
9685
9957
|
raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
9686
|
-
|
|
9687
|
-
|
|
9958
|
+
"n", "case_when_then"),
|
|
9959
|
+
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
9688
9960
|
if frac is not None and case_when_then is not None:
|
|
9689
9961
|
raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
9690
|
-
|
|
9691
|
-
|
|
9962
|
+
"frac", "case_when_then"),
|
|
9963
|
+
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
9692
9964
|
_Validators._validate_dependent_argument("case_else", case_else, "case_when_then", case_when_then)
|
|
9693
9965
|
if n is None and frac is None and case_when_then is None:
|
|
9694
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
9695
|
-
|
|
9696
|
-
|
|
9966
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
9967
|
+
"n or frac", "case_when_then"),
|
|
9968
|
+
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
9697
9969
|
|
|
9698
9970
|
# Argument validations
|
|
9699
9971
|
awu_matrix = []
|
|
@@ -9739,7 +10011,7 @@ class DataFrame():
|
|
|
9739
10011
|
self.__validate_sum_of_list_for_sample_api(frac, "frac")
|
|
9740
10012
|
self.__validate_number_of_rows_for_sample_api(frac, "frac")
|
|
9741
10013
|
case_when_then = {}
|
|
9742
|
-
list_of_fracs
|
|
10014
|
+
list_of_fracs = frac
|
|
9743
10015
|
|
|
9744
10016
|
# When stratify column is passed for sample then perform TrainTestSplit
|
|
9745
10017
|
# for data sampling.
|
|
@@ -9750,9 +10022,9 @@ class DataFrame():
|
|
|
9750
10022
|
# For statify column Train Test split size must sum up to 1.
|
|
9751
10023
|
if len(list_of_fracs) == 1:
|
|
9752
10024
|
list_of_fracs.append(1 - list_of_fracs[0])
|
|
9753
|
-
|
|
10025
|
+
|
|
9754
10026
|
# Call TrainTestSplit and return the result dataframe.
|
|
9755
|
-
TrainTestSplit_out = TrainTestSplit(data
|
|
10027
|
+
TrainTestSplit_out = TrainTestSplit(data=self,
|
|
9756
10028
|
id_column=id_column,
|
|
9757
10029
|
train_size=list_of_fracs[0],
|
|
9758
10030
|
test_size=list_of_fracs[1],
|
|
@@ -9762,11 +10034,11 @@ class DataFrame():
|
|
|
9762
10034
|
# for backward compatibility.
|
|
9763
10035
|
_sampled_df = TrainTestSplit_out.result
|
|
9764
10036
|
# Column name "TD_IsTrainRow" renamed to "sampleid".
|
|
9765
|
-
return _sampled_df.assign(sampleid
|
|
9766
|
-
|
|
9767
|
-
|
|
9768
|
-
|
|
9769
|
-
|
|
10037
|
+
return _sampled_df.assign(sampleid=case([
|
|
10038
|
+
(_sampled_df.TD_IsTrainRow == 0, 2)],
|
|
10039
|
+
else_=1)).drop("TD_IsTrainRow", axis=1)
|
|
10040
|
+
|
|
10041
|
+
|
|
9770
10042
|
|
|
9771
10043
|
else:
|
|
9772
10044
|
# Creating OrderDict for 'case_when_then' so that order of keys doesn't change after
|
|
@@ -9774,8 +10046,8 @@ class DataFrame():
|
|
|
9774
10046
|
case_when_then = OrderedDict(case_when_then)
|
|
9775
10047
|
if len(case_when_then) > 16:
|
|
9776
10048
|
raise TeradataMlException(
|
|
9777
|
-
|
|
9778
|
-
|
|
10049
|
+
Messages.get_message(MessageCodes.TDML_SAMPLE_INVALID_NUMBER_OF_SAMPLES, "case_when_then"),
|
|
10050
|
+
MessageCodes.TDML_SAMPLE_INVALID_NUMBER_OF_SAMPLES)
|
|
9779
10051
|
|
|
9780
10052
|
transformed_case_when_then = OrderedDict()
|
|
9781
10053
|
for when_condition, then_sample_number in case_when_then.items():
|
|
@@ -9791,12 +10063,12 @@ class DataFrame():
|
|
|
9791
10063
|
|
|
9792
10064
|
# Validating values in the dict.
|
|
9793
10065
|
if isinstance(then_sample_number, int) or (isinstance(then_sample_number, list) \
|
|
9794
|
-
|
|
10066
|
+
and isinstance(then_sample_number[0], int)):
|
|
9795
10067
|
_Validators._validate_function_arguments([["Values in case_when_then", then_sample_number,
|
|
9796
|
-
|
|
10068
|
+
True, (int, list)]])
|
|
9797
10069
|
else:
|
|
9798
10070
|
_Validators._validate_function_arguments([["Values in case_when_then", then_sample_number,
|
|
9799
|
-
|
|
10071
|
+
True, ((float, list))]])
|
|
9800
10072
|
|
|
9801
10073
|
if isinstance(then_sample_number, list):
|
|
9802
10074
|
self.__validate_len_of_list_for_sample_api(then_sample_number, "case_when_then")
|
|
@@ -9818,11 +10090,11 @@ class DataFrame():
|
|
|
9818
10090
|
|
|
9819
10091
|
case_else_awu_matrix = []
|
|
9820
10092
|
if isinstance(case_else[0], int):
|
|
9821
|
-
case_else_awu_matrix.append(['Number of rows or fractions in case_else',
|
|
9822
|
-
|
|
10093
|
+
case_else_awu_matrix.append(['Number of rows or fractions in case_else',
|
|
10094
|
+
case_else, True, (int, list)])
|
|
9823
10095
|
else:
|
|
9824
|
-
case_else_awu_matrix.append(['Number of rows or fractions in case_else',
|
|
9825
|
-
|
|
10096
|
+
case_else_awu_matrix.append(['Number of rows or fractions in case_else',
|
|
10097
|
+
case_else, True, (float, list)])
|
|
9826
10098
|
|
|
9827
10099
|
# Validating argument values for 'case_else'.
|
|
9828
10100
|
_Validators._validate_function_arguments(case_else_awu_matrix)
|
|
@@ -9848,16 +10120,25 @@ class DataFrame():
|
|
|
9848
10120
|
for column in self.columns:
|
|
9849
10121
|
self.__add_column_type_item_to_dict(new_metaexpr_columns_types, column,
|
|
9850
10122
|
column, df_columns_types)
|
|
9851
|
-
|
|
10123
|
+
|
|
9852
10124
|
# As we are creating new column name, adding it to new metadata dict
|
|
9853
10125
|
new_metaexpr_columns_types[sample_column] = INTEGER()
|
|
9854
10126
|
sample_node_id = self._aed_utils._aed_sample(self._nodeid, ",".join(selected_columns),
|
|
9855
10127
|
list_of_fracs, replace, randomize, case_when_then, case_else_var)
|
|
9856
|
-
|
|
10128
|
+
|
|
10129
|
+
column_info = ((col_name, col_type) for col_name, col_type in
|
|
9857
10130
|
new_metaexpr_columns_types.items())
|
|
9858
10131
|
# Get new metaexpr for sample_node_id
|
|
9859
10132
|
new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sample_node_id, column_info, is_persist=True)
|
|
9860
|
-
|
|
10133
|
+
|
|
10134
|
+
# Make this non-lazy. Added this in order to fix https://teradata-pe.atlassian.net/browse/ELE-6368
|
|
10135
|
+
# Cannot use __execute_node_and_set_table_name because self points to original df.
|
|
10136
|
+
# Hence, setting the __table_name with _execute_node_return_db_object_name.
|
|
10137
|
+
|
|
10138
|
+
df = self._create_dataframe_from_node(sample_node_id, new_metaexpr, self._index_label)
|
|
10139
|
+
df.__table_name = df_utils._execute_node_return_db_object_name(sample_node_id, new_metaexpr)
|
|
10140
|
+
|
|
10141
|
+
return df
|
|
9861
10142
|
|
|
9862
10143
|
except TeradataMlException:
|
|
9863
10144
|
raise
|
|
@@ -9873,7 +10154,8 @@ class DataFrame():
|
|
|
9873
10154
|
msg = Messages.get_message(errcode)
|
|
9874
10155
|
raise TeradataMlException(msg, errcode) from err
|
|
9875
10156
|
|
|
9876
|
-
|
|
10157
|
+
@collect_queryband(queryband="DF_showQuery")
|
|
10158
|
+
def show_query(self, full_query=False):
|
|
9877
10159
|
"""
|
|
9878
10160
|
DESCRIPTION:
|
|
9879
10161
|
Function returns underlying SQL for the teradataml DataFrame. It is the same
|
|
@@ -10016,6 +10298,7 @@ class DataFrame():
|
|
|
10016
10298
|
msg = Messages.get_message(errcode)
|
|
10017
10299
|
raise TeradataMlException(msg, errcode) from err
|
|
10018
10300
|
|
|
10301
|
+
@collect_queryband(queryband="DF_mapRow")
|
|
10019
10302
|
def map_row(self,
|
|
10020
10303
|
user_function,
|
|
10021
10304
|
exec_mode='IN-DB',
|
|
@@ -10083,9 +10366,6 @@ class DataFrame():
|
|
|
10083
10366
|
* LOCAL: Execute the function locally on sample data (at
|
|
10084
10367
|
most "num_rows" rows) from the teradataml
|
|
10085
10368
|
DataFrame.
|
|
10086
|
-
* SANDBOX: Execute the function locally within a sandbox
|
|
10087
|
-
environment on sample data (at most "num_rows"
|
|
10088
|
-
rows) from the teradataml DataFrame.
|
|
10089
10369
|
Default value: 'IN-DB'
|
|
10090
10370
|
Types: str
|
|
10091
10371
|
|
|
@@ -10103,7 +10383,7 @@ class DataFrame():
|
|
|
10103
10383
|
Optional Argument.
|
|
10104
10384
|
Specifies the maximum number of sample rows to use from the
|
|
10105
10385
|
teradataml DataFrame to apply the user defined function to when
|
|
10106
|
-
"exec_mode" is 'LOCAL'
|
|
10386
|
+
"exec_mode" is 'LOCAL'.
|
|
10107
10387
|
Default value: 1000
|
|
10108
10388
|
Types: int
|
|
10109
10389
|
|
|
@@ -10383,6 +10663,7 @@ class DataFrame():
|
|
|
10383
10663
|
|
|
10384
10664
|
return tbl_op_util.execute()
|
|
10385
10665
|
|
|
10666
|
+
@collect_queryband(queryband="DF_mapPartition")
|
|
10386
10667
|
def map_partition(self,
|
|
10387
10668
|
user_function,
|
|
10388
10669
|
exec_mode='IN-DB',
|
|
@@ -10454,9 +10735,6 @@ class DataFrame():
|
|
|
10454
10735
|
* LOCAL: Execute the function locally on sample data (at
|
|
10455
10736
|
most "num_rows" rows) from the teradataml
|
|
10456
10737
|
DataFrame.
|
|
10457
|
-
* SANDBOX: Execute the function locally within a sandbox
|
|
10458
|
-
environment on sample data (at most "num_rows"
|
|
10459
|
-
rows) from the teradataml DataFrame.
|
|
10460
10738
|
Default value: 'IN-DB'
|
|
10461
10739
|
Types: str
|
|
10462
10740
|
|
|
@@ -10474,7 +10752,7 @@ class DataFrame():
|
|
|
10474
10752
|
Optional Argument.
|
|
10475
10753
|
Specifies the maximum number of sample rows to use from the
|
|
10476
10754
|
teradataml DataFrame to apply the user defined function to when
|
|
10477
|
-
"exec_mode" is 'LOCAL'
|
|
10755
|
+
"exec_mode" is 'LOCAL'.
|
|
10478
10756
|
Default value: 1000
|
|
10479
10757
|
Types: int
|
|
10480
10758
|
|
|
@@ -10795,6 +11073,7 @@ class DataFrame():
|
|
|
10795
11073
|
|
|
10796
11074
|
return tbl_op_util.execute()
|
|
10797
11075
|
|
|
11076
|
+
@collect_queryband(queryband="DF_apply")
|
|
10798
11077
|
def apply(self,
|
|
10799
11078
|
user_function,
|
|
10800
11079
|
exec_mode='REMOTE',
|
|
@@ -11148,8 +11427,8 @@ class DataFrame():
|
|
|
11148
11427
|
# When returns argument is not specified, assume output schema
|
|
11149
11428
|
# is same as input table schema.
|
|
11150
11429
|
default_returns = OrderedDict(zip(self.columns,
|
|
11151
|
-
|
|
11152
|
-
|
|
11430
|
+
[col.type for col in
|
|
11431
|
+
self._metaexpr.c]))
|
|
11153
11432
|
returns = kwargs.pop('returns', default_returns)
|
|
11154
11433
|
arg_info_matrix.append(["returns", returns, False, (dict)])
|
|
11155
11434
|
|
|
@@ -11194,6 +11473,7 @@ class DataFrame():
|
|
|
11194
11473
|
|
|
11195
11474
|
return tbl_op_util.execute()
|
|
11196
11475
|
|
|
11476
|
+
@collect_queryband(queryband="DF_window")
|
|
11197
11477
|
def window(self,
|
|
11198
11478
|
partition_columns=None,
|
|
11199
11479
|
order_columns=None,
|
|
@@ -11240,7 +11520,7 @@ class DataFrame():
|
|
|
11240
11520
|
columns of a teradataml DataFrame.
|
|
11241
11521
|
3. "partition_columns" supports only columns specified in
|
|
11242
11522
|
groupby function, if window is initiated on DataFrameGroupBy.
|
|
11243
|
-
Types: str OR list of Strings (str)
|
|
11523
|
+
Types: str OR list of Strings (str) OR ColumnExpression OR list of ColumnExpressions
|
|
11244
11524
|
|
|
11245
11525
|
order_columns:
|
|
11246
11526
|
Optional Argument.
|
|
@@ -11254,17 +11534,24 @@ class DataFrame():
|
|
|
11254
11534
|
columns of a teradataml DataFrame.
|
|
11255
11535
|
2. "order_columns" supports only columns specified in
|
|
11256
11536
|
groupby, if window is initiated on DataFrameGroupBy.
|
|
11257
|
-
|
|
11258
|
-
|
|
11537
|
+
3. When ColumnExpression(s) is(are) passed to "order_columns", then the
|
|
11538
|
+
corresponding expression takes precedence over arguments
|
|
11539
|
+
"sort_ascending" and "nulls_first". Say, ColumnExpression is col1, then
|
|
11540
|
+
1. col1.asc() or col.desc() is effective irrespective of "sort_ascending".
|
|
11541
|
+
2. col1.nulls_first() or col.nulls_last() is effective irrespective of "nulls_first".
|
|
11542
|
+
3. Any combination of above two take precedence over "sort_ascending" and "nulls_first".
|
|
11543
|
+
Types: str OR list of Strings (str) OR ColumnExpression OR list of ColumnExpressions
|
|
11259
11544
|
|
|
11260
11545
|
sort_ascending:
|
|
11261
11546
|
Optional Argument.
|
|
11262
11547
|
Specifies whether column ordering should be in ascending or
|
|
11263
11548
|
descending order.
|
|
11264
11549
|
Default Value: True (ascending)
|
|
11265
|
-
|
|
11266
|
-
|
|
11267
|
-
|
|
11550
|
+
Notes:
|
|
11551
|
+
* When "order_columns" argument is not specified, this argument
|
|
11552
|
+
is ignored.
|
|
11553
|
+
* When ColumnExpression(s) is(are) passed to "order_columns", then the
|
|
11554
|
+
argument is ignored.
|
|
11268
11555
|
Types: bool
|
|
11269
11556
|
|
|
11270
11557
|
nulls_first:
|
|
@@ -11272,9 +11559,11 @@ class DataFrame():
|
|
|
11272
11559
|
Specifies whether null results are to be listed first or last
|
|
11273
11560
|
or scattered.
|
|
11274
11561
|
Default Value: None
|
|
11275
|
-
|
|
11276
|
-
|
|
11277
|
-
|
|
11562
|
+
Notes:
|
|
11563
|
+
* When "order_columns" argument is not specified, this argument
|
|
11564
|
+
is ignored.
|
|
11565
|
+
* When "order_columns" is a ColumnExpression(s), this argument
|
|
11566
|
+
is ignored.
|
|
11278
11567
|
Types: bool
|
|
11279
11568
|
|
|
11280
11569
|
window_start_point:
|
|
@@ -11362,19 +11651,18 @@ class DataFrame():
|
|
|
11362
11651
|
# between unbounded preceding and 3 preceding with
|
|
11363
11652
|
# "partition_columns" and "order_columns" argument with
|
|
11364
11653
|
# default sorting.
|
|
11365
|
-
>>> window = df.window(partition_columns=
|
|
11366
|
-
... order_columns=[
|
|
11654
|
+
>>> window = df.window(partition_columns=df.Feb,
|
|
11655
|
+
... order_columns=[df.Feb, "datetime"],
|
|
11367
11656
|
... window_start_point=None,
|
|
11368
11657
|
... window_end_point=-3)
|
|
11369
11658
|
>>>
|
|
11370
11659
|
|
|
11371
11660
|
# Example 3: Create a moving (rolling) window with rows between
|
|
11372
|
-
# current row and 3 following with sorting done on 'Feb'
|
|
11373
|
-
#
|
|
11374
|
-
# "partition_columns" argument.
|
|
11375
|
-
>>> window = df.window(partition_columns=
|
|
11376
|
-
... order_columns=[
|
|
11377
|
-
... sort_ascending=False,
|
|
11661
|
+
# current row and 3 following with sorting done on 'Feb'
|
|
11662
|
+
# in ascending order, datetime' columns in descending order
|
|
11663
|
+
# and "partition_columns" argument.
|
|
11664
|
+
>>> window = df.window(partition_columns=df.Feb,
|
|
11665
|
+
... order_columns=[df.Feb.asc(), df.datetime.desc()],
|
|
11378
11666
|
... window_start_point=0,
|
|
11379
11667
|
... window_end_point=3)
|
|
11380
11668
|
>>>
|
|
@@ -11384,30 +11672,26 @@ class DataFrame():
|
|
|
11384
11672
|
# sorting done on 'Feb', 'datetime' columns in ascending
|
|
11385
11673
|
# order and NULL values in 'Feb', 'datetime'
|
|
11386
11674
|
# columns appears at last.
|
|
11387
|
-
>>> window = df.window(partition_columns=
|
|
11388
|
-
... order_columns=[
|
|
11389
|
-
... nulls_first=False,
|
|
11675
|
+
>>> window = df.window(partition_columns=df.Feb,
|
|
11676
|
+
... order_columns=[df.Feb.nulls_first(), df.datetime.nulls_first()],
|
|
11390
11677
|
... window_start_point=0,
|
|
11391
11678
|
... window_end_point=None)
|
|
11392
11679
|
>>>
|
|
11393
11680
|
|
|
11394
11681
|
# Example 5: Create a grouping window, with sorting done on 'Feb',
|
|
11395
|
-
# 'datetime' columns in ascending order
|
|
11396
|
-
# in 'Feb'
|
|
11682
|
+
# 'datetime' columns in ascending order with NULL values
|
|
11683
|
+
# in 'Feb' column appears at first and 'datetime' column
|
|
11684
|
+
# appears at last.
|
|
11397
11685
|
>>> window = df.window(partition_columns="Feb",
|
|
11398
|
-
... order_columns=[
|
|
11399
|
-
... sort_ascending=False,
|
|
11400
|
-
... nulls_first=False,
|
|
11686
|
+
... order_columns=[df.Feb.nulls_first(), df.datetime.nulls_last()],
|
|
11401
11687
|
... window_start_point=None,
|
|
11402
11688
|
... window_end_point=None)
|
|
11403
11689
|
>>>
|
|
11404
11690
|
|
|
11405
11691
|
# Example 6: Create a window on a teradataml DataFrame, which
|
|
11406
11692
|
# ignores all the parameters while creating window.
|
|
11407
|
-
>>> window = df.window(partition_columns=
|
|
11408
|
-
... order_columns=[
|
|
11409
|
-
... sort_ascending=False,
|
|
11410
|
-
... nulls_first=False,
|
|
11693
|
+
>>> window = df.window(partition_columns=df.Feb,
|
|
11694
|
+
... order_columns=[df.Feb.desc().nulls_last(), df.datetime.desc().nulls_last()]
|
|
11411
11695
|
... ignore_window=True)
|
|
11412
11696
|
>>>
|
|
11413
11697
|
|
|
@@ -11462,6 +11746,7 @@ class DataFrame():
|
|
|
11462
11746
|
window_end_point=window_end_point,
|
|
11463
11747
|
ignore_window=ignore_window)
|
|
11464
11748
|
|
|
11749
|
+
@collect_queryband(queryband="DF_dropDuplicate")
|
|
11465
11750
|
def drop_duplicate(self, column_names=None):
|
|
11466
11751
|
"""
|
|
11467
11752
|
DESCRIPTION:
|
|
@@ -11535,6 +11820,7 @@ class DataFrame():
|
|
|
11535
11820
|
new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items())
|
|
11536
11821
|
return self._create_dataframe_from_node(sel_nodeid, new_metaexpr, self._index_label)
|
|
11537
11822
|
|
|
11823
|
+
@collect_queryband(queryband="DF_toCsv")
|
|
11538
11824
|
def to_csv(self, csv_file,
|
|
11539
11825
|
num_rows=99999,
|
|
11540
11826
|
all_rows=False,
|
|
@@ -11760,10 +12046,11 @@ class DataFrame():
|
|
|
11760
12046
|
Messages.get_message(MessageCodes.DATA_EXPORT_FAILED, "to_csv",
|
|
11761
12047
|
"CSV file", str(err)),
|
|
11762
12048
|
MessageCodes.DATA_EXPORT_FAILED)
|
|
11763
|
-
|
|
12049
|
+
|
|
12050
|
+
@collect_queryband(queryband="DF_pivot")
|
|
11764
12051
|
def pivot(self,
|
|
11765
12052
|
columns=None,
|
|
11766
|
-
aggfuncs=None,
|
|
12053
|
+
aggfuncs=None,
|
|
11767
12054
|
limit_combinations=False,
|
|
11768
12055
|
margins=None,
|
|
11769
12056
|
returns=None,
|
|
@@ -12110,7 +12397,7 @@ class DataFrame():
|
|
|
12110
12397
|
if columns_arg_req and isinstance(columns, dict):
|
|
12111
12398
|
expected_value_types = (int, float, str, _ListOf(int), _ListOf(float), _ListOf(str), DataFrame)
|
|
12112
12399
|
_Validators._validate_dict_argument_key_value(arg_name="columns", arg_dict=columns,
|
|
12113
|
-
key_types=(ColumnExpression,
|
|
12400
|
+
key_types=(ColumnExpression,),
|
|
12114
12401
|
value_types=expected_value_types)
|
|
12115
12402
|
if margins:
|
|
12116
12403
|
_Validators._validate_dict_argument_key_value(arg_name="margins", arg_dict=margins,
|
|
@@ -12159,7 +12446,7 @@ class DataFrame():
|
|
|
12159
12446
|
"DataFrame specified as value in 'columns' argument "
|
|
12160
12447
|
"should have only one column.")
|
|
12161
12448
|
raise ValueError(err_)
|
|
12162
|
-
_column_value = [*
|
|
12449
|
+
_column_value = [*(i[0] for i in _v_df.drop_duplicate().get_values())]
|
|
12163
12450
|
else:
|
|
12164
12451
|
# We are allowing users to pass an int, str, float or list of int, float, str.
|
|
12165
12452
|
# Convert it to list, if it is not a list.
|
|
@@ -12238,7 +12525,7 @@ class DataFrame():
|
|
|
12238
12525
|
non_participating_columns = [col for col in self.columns if col not in participating_columns]
|
|
12239
12526
|
|
|
12240
12527
|
# Generating WITH clause.
|
|
12241
|
-
with_clause, seperator
|
|
12528
|
+
with_clause, seperator = "", ""
|
|
12242
12529
|
with_clause_column_names = []
|
|
12243
12530
|
if margins:
|
|
12244
12531
|
# margins will be a dict. Key is analytic function name. Value can be a tuple or list of tuple.
|
|
@@ -12284,7 +12571,8 @@ class DataFrame():
|
|
|
12284
12571
|
sql = "SELECT * FROM {} PIVOT ({for_clause} {with_clause}) {derived_table_clause}".format(
|
|
12285
12572
|
self._table_name, for_clause=for_clause, with_clause=with_clause, derived_table_clause=tmp_clause)
|
|
12286
12573
|
return DataFrame.from_query(sql)
|
|
12287
|
-
|
|
12574
|
+
|
|
12575
|
+
@collect_queryband(queryband="DF_unpivot")
|
|
12288
12576
|
def unpivot(self,
|
|
12289
12577
|
columns=None,
|
|
12290
12578
|
transpose_column=None,
|
|
@@ -12546,7 +12834,7 @@ class DataFrame():
|
|
|
12546
12834
|
arg_info_matrix.append(["exclude_nulls", exclude_nulls, True, (bool)])
|
|
12547
12835
|
arg_info_matrix.append(["returns", returns, True, (str, list), True])
|
|
12548
12836
|
arg_info_matrix.append(["all_columns", all_columns, True, (bool)])
|
|
12549
|
-
for i in range(1, int(len(kwargs)/2) + 1):
|
|
12837
|
+
for i in range(1, int(len(kwargs) / 2) + 1):
|
|
12550
12838
|
# Get the values of colN where N is in range(1, half the length of kwargs + 1).
|
|
12551
12839
|
col = kwargs.get("col{}".format(i), None)
|
|
12552
12840
|
col_val = kwargs.get("col{}_value".format(i), None)
|
|
@@ -12703,7 +12991,7 @@ class DataFrame():
|
|
|
12703
12991
|
self._nodeid, self._metaexpr)
|
|
12704
12992
|
|
|
12705
12993
|
# Generate the SELECT query.
|
|
12706
|
-
select_query = 'SELECT * FROM {tbl_name} UNPIVOT{excl_null} ({for_cl}) {tmp_cl};'
|
|
12994
|
+
select_query = 'SELECT * FROM {tbl_name} UNPIVOT{excl_null} ({for_cl}) {tmp_cl};'. \
|
|
12707
12995
|
format(tbl_name=self._table_name,
|
|
12708
12996
|
excl_null="" if exclude_nulls else " INCLUDE NULLS",
|
|
12709
12997
|
for_cl=for_clause,
|
|
@@ -12712,6 +13000,7 @@ class DataFrame():
|
|
|
12712
13000
|
# Create the teradataml dataframe from SELECT query and return the same.
|
|
12713
13001
|
return DataFrame.from_query(select_query)
|
|
12714
13002
|
|
|
13003
|
+
@collect_queryband(queryband="DF_plot")
|
|
12715
13004
|
def plot(self, x, y, scale=None, kind="line", **kwargs):
|
|
12716
13005
|
"""
|
|
12717
13006
|
DESCRIPTION:
|
|
@@ -13075,6 +13364,14 @@ class DataFrame():
|
|
|
13075
13364
|
Applicable only for the wiggle and mesh plots.
|
|
13076
13365
|
Types: int OR float
|
|
13077
13366
|
|
|
13367
|
+
ignore_nulls:
|
|
13368
|
+
Optional Argument.
|
|
13369
|
+
Specifies whether to delete rows with null values or not present in 'x', 'y' and
|
|
13370
|
+
'scale' params.
|
|
13371
|
+
Default Value: False
|
|
13372
|
+
Types: bool
|
|
13373
|
+
|
|
13374
|
+
|
|
13078
13375
|
RAISES:
|
|
13079
13376
|
TeradataMlException
|
|
13080
13377
|
|
|
@@ -13437,6 +13734,7 @@ class DataFrame():
|
|
|
13437
13734
|
"""
|
|
13438
13735
|
return _Plot(x=x, y=y, scale=scale, kind=kind, **kwargs)
|
|
13439
13736
|
|
|
13737
|
+
@collect_queryband(queryband="DF_itertuples")
|
|
13440
13738
|
def itertuples(self, name='Row', num_rows=None):
|
|
13441
13739
|
"""
|
|
13442
13740
|
DESCRIPTION:
|
|
@@ -13513,6 +13811,287 @@ class DataFrame():
|
|
|
13513
13811
|
for rec in cur:
|
|
13514
13812
|
yield rec
|
|
13515
13813
|
|
|
13814
|
+
@collect_queryband(queryband="DF_replace")
|
|
13815
|
+
def replace(self, to_replace, value=None, subset=None):
|
|
13816
|
+
"""
|
|
13817
|
+
DESCRIPTION:
|
|
13818
|
+
Function replaces every occurrence of "to_replace" with the "value"
|
|
13819
|
+
in the columns mentioned in "subset". When "subset" is not provided,
|
|
13820
|
+
function replaces in all columns.
|
|
13821
|
+
|
|
13822
|
+
PARAMETERS:
|
|
13823
|
+
to_replace:
|
|
13824
|
+
Required Argument.
|
|
13825
|
+
Specifies a ColumnExpression or a literal that the function
|
|
13826
|
+
searches for values in the Column. Use ColumnExpression when
|
|
13827
|
+
you want to match the condition based on a DataFrameColumn
|
|
13828
|
+
function, else use literal.
|
|
13829
|
+
Note:
|
|
13830
|
+
Only ColumnExpressions generated from DataFrameColumn
|
|
13831
|
+
functions are supported. BinaryExpressions are not supported.
|
|
13832
|
+
Example: Consider teradataml DataFrame has two columns COL1, COL2.
|
|
13833
|
+
df.COL1.abs() is supported but df.COL1 == df.COL2 is not
|
|
13834
|
+
supported.
|
|
13835
|
+
Supported column types: CHAR, VARCHAR, FLOAT, INTEGER, DECIMAL
|
|
13836
|
+
Types: ColumnExpression OR int OR float OR str OR dict
|
|
13837
|
+
|
|
13838
|
+
value:
|
|
13839
|
+
Required argument when "to_replace" is not a dictionary. Optional otherwise.
|
|
13840
|
+
Specifies a ColumnExpression or a literal that replaces
|
|
13841
|
+
the "to_replace" in the column. Use ColumnExpression when
|
|
13842
|
+
you want to replace based on a DataFrameColumn function, else
|
|
13843
|
+
use literal.
|
|
13844
|
+
Notes:
|
|
13845
|
+
* Argument is ignored if "to_replace" is a dictionary.
|
|
13846
|
+
* Only ColumnExpressions generated from DataFrameColumn
|
|
13847
|
+
functions are supported. BinaryExpressions are not supported.
|
|
13848
|
+
Example: Consider teradataml DataFrame has two columns COL1, COL2.
|
|
13849
|
+
df.COL1.abs() is supported but df.COL1 == df.COL2 is not
|
|
13850
|
+
supported.
|
|
13851
|
+
Supported column types: CHAR, VARCHAR, FLOAT, INTEGER, DECIMAL
|
|
13852
|
+
Types: ColumnExpression OR int OR float OR str
|
|
13853
|
+
|
|
13854
|
+
subset:
|
|
13855
|
+
Optional Argument.
|
|
13856
|
+
Specifies column(s) to consider for replacing the values.
|
|
13857
|
+
Types: ColumnExpression OR str OR list
|
|
13858
|
+
|
|
13859
|
+
RAISES:
|
|
13860
|
+
TeradataMlException
|
|
13861
|
+
|
|
13862
|
+
RETURNS:
|
|
13863
|
+
teradataml DataFrame
|
|
13864
|
+
|
|
13865
|
+
EXAMPLES:
|
|
13866
|
+
# Load the data to run the example.
|
|
13867
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
13868
|
+
|
|
13869
|
+
# Create a DataFrame on 'admissions_train' table.
|
|
13870
|
+
>>> df = DataFrame("admissions_train")
|
|
13871
|
+
>>> print(df)
|
|
13872
|
+
masters gpa stats programming admitted
|
|
13873
|
+
id
|
|
13874
|
+
15 yes 4.00 Advanced Advanced 1
|
|
13875
|
+
34 yes 3.85 Advanced Beginner 0
|
|
13876
|
+
13 no 4.00 Advanced Novice 1
|
|
13877
|
+
38 yes 2.65 Advanced Beginner 1
|
|
13878
|
+
5 no 3.44 Novice Novice 0
|
|
13879
|
+
40 yes 3.95 Novice Beginner 0
|
|
13880
|
+
7 yes 2.33 Novice Novice 1
|
|
13881
|
+
22 yes 3.46 Novice Beginner 0
|
|
13882
|
+
26 yes 3.57 Advanced Advanced 1
|
|
13883
|
+
17 no 3.83 Advanced Advanced 1
|
|
13884
|
+
|
|
13885
|
+
# Example 1: Replace the string 'Advanced' with 'Good' in columns 'stats'
|
|
13886
|
+
# and 'programming'.
|
|
13887
|
+
>>> res = df.replace("Advanced", "Good", subset=["stats", "programming"])
|
|
13888
|
+
>>> print(res)
|
|
13889
|
+
masters gpa stats programming admitted
|
|
13890
|
+
id
|
|
13891
|
+
13 no 4.00 Good Novice 1
|
|
13892
|
+
36 no 3.00 Good Novice 0
|
|
13893
|
+
15 yes 4.00 Good Good 1
|
|
13894
|
+
40 yes 3.95 Novice Beginner 0
|
|
13895
|
+
22 yes 3.46 Novice Beginner 0
|
|
13896
|
+
38 yes 2.65 Good Beginner 1
|
|
13897
|
+
26 yes 3.57 Good Good 1
|
|
13898
|
+
5 no 3.44 Novice Novice 0
|
|
13899
|
+
7 yes 2.33 Novice Novice 1
|
|
13900
|
+
19 yes 1.98 Good Good 0
|
|
13901
|
+
|
|
13902
|
+
# Example 2: Replace the string 'Advanced' with 'Good' and 'Beginner' with 'starter'
|
|
13903
|
+
# in columns 'stats' and 'programming'.
|
|
13904
|
+
>>> res = df.replace({"Advanced": "Good", "Beginner": "starter"}, subset=["stats", "programming"])
|
|
13905
|
+
>>> print(res)
|
|
13906
|
+
masters gpa stats programming admitted
|
|
13907
|
+
id
|
|
13908
|
+
15 yes 4.00 Good Good 1
|
|
13909
|
+
7 yes 2.33 Novice Novice 1
|
|
13910
|
+
22 yes 3.46 Novice starter 0
|
|
13911
|
+
17 no 3.83 Good Good 1
|
|
13912
|
+
13 no 4.00 Good Novice 1
|
|
13913
|
+
38 yes 2.65 Good starter 1
|
|
13914
|
+
26 yes 3.57 Good Good 1
|
|
13915
|
+
5 no 3.44 Novice Novice 0
|
|
13916
|
+
34 yes 3.85 Good starter 0
|
|
13917
|
+
40 yes 3.95 Novice starter 0
|
|
13918
|
+
|
|
13919
|
+
# Example 3: Append the string '_New' to 'stats' column when values in
|
|
13920
|
+
# 'programming' and 'stats' are same.
|
|
13921
|
+
>>> res = df.replace({df.programming: df.stats+"_New"}, subset=["stats"])
|
|
13922
|
+
>>> print(res)
|
|
13923
|
+
masters gpa stats programming admitted
|
|
13924
|
+
id
|
|
13925
|
+
15 yes 4.00 Advanced_New Advanced 1
|
|
13926
|
+
34 yes 3.85 Advanced Beginner 0
|
|
13927
|
+
13 no 4.00 Advanced Novice 1
|
|
13928
|
+
38 yes 2.65 Advanced Beginner 1
|
|
13929
|
+
5 no 3.44 Novice_New Novice 0
|
|
13930
|
+
40 yes 3.95 Novice Beginner 0
|
|
13931
|
+
7 yes 2.33 Novice_New Novice 1
|
|
13932
|
+
22 yes 3.46 Novice Beginner 0
|
|
13933
|
+
26 yes 3.57 Advanced_New Advanced 1
|
|
13934
|
+
17 no 3.83 Advanced_New Advanced 1
|
|
13935
|
+
|
|
13936
|
+
# Example 4: Round the values of gpa to it's nearest integer.
|
|
13937
|
+
>>> res = df.replace({df.gpa: df.gpa.round(0)}, subset=["gpa"])
|
|
13938
|
+
>>> print(res)
|
|
13939
|
+
masters gpa stats programming admitted
|
|
13940
|
+
id
|
|
13941
|
+
15 yes 4.0 Advanced Advanced 1
|
|
13942
|
+
7 yes 2.0 Novice Novice 1
|
|
13943
|
+
22 yes 3.0 Novice Beginner 0
|
|
13944
|
+
17 no 4.0 Advanced Advanced 1
|
|
13945
|
+
13 no 4.0 Advanced Novice 1
|
|
13946
|
+
38 yes 3.0 Advanced Beginner 1
|
|
13947
|
+
26 yes 4.0 Advanced Advanced 1
|
|
13948
|
+
5 no 3.0 Novice Novice 0
|
|
13949
|
+
34 yes 4.0 Advanced Beginner 0
|
|
13950
|
+
40 yes 4.0 Novice Beginner 0
|
|
13951
|
+
|
|
13952
|
+
# Example 5: Replace the value of masters with '1' if value is 'yes'
|
|
13953
|
+
# and with '0' if value is no.
|
|
13954
|
+
>>> res = df.replace({'yes': 1, 'no': 0}, subset=["masters"])
|
|
13955
|
+
>>> print(res)
|
|
13956
|
+
masters gpa stats programming admitted
|
|
13957
|
+
id
|
|
13958
|
+
15 1 4.00 Advanced Advanced 1
|
|
13959
|
+
7 1 2.33 Novice Novice 1
|
|
13960
|
+
22 1 3.46 Novice Beginner 0
|
|
13961
|
+
17 0 3.83 Advanced Advanced 1
|
|
13962
|
+
13 0 4.00 Advanced Novice 1
|
|
13963
|
+
38 1 2.65 Advanced Beginner 1
|
|
13964
|
+
26 1 3.57 Advanced Advanced 1
|
|
13965
|
+
5 0 3.44 Novice Novice 0
|
|
13966
|
+
34 1 3.85 Advanced Beginner 0
|
|
13967
|
+
40 1 3.95 Novice Beginner 0
|
|
13968
|
+
"""
|
|
13969
|
+
_validation_matrix = []
|
|
13970
|
+
_validation_matrix.append(["to_replace", to_replace, True, (int, float, str, dict, ColumnExpression)])
|
|
13971
|
+
_validation_matrix.append(["value", value, False, (int, float, str, dict, type(None), ColumnExpression)])
|
|
13972
|
+
_validation_matrix.append(["subset", subset, False, (str, list, type(None))])
|
|
13973
|
+
_Validators._validate_function_arguments(_validation_matrix)
|
|
13974
|
+
|
|
13975
|
+
if subset is None:
|
|
13976
|
+
subset = self.columns
|
|
13977
|
+
else:
|
|
13978
|
+
subset = [col.name if not isinstance(col, str) else col for col in UtilFuncs._as_list(subset)]
|
|
13979
|
+
|
|
13980
|
+
if not isinstance(to_replace, dict):
|
|
13981
|
+
to_replace = {to_replace: value}
|
|
13982
|
+
|
|
13983
|
+
new_columns = {}
|
|
13984
|
+
for column in self.columns:
|
|
13985
|
+
new_columns[column] = self[column].replace(to_replace) if column in subset else self[column]
|
|
13986
|
+
return self.assign(**new_columns, drop_columns=True).select(self.columns)
|
|
13987
|
+
|
|
13988
|
+
@collect_queryband(queryband="DF_cube")
|
|
13989
|
+
def cube(self, columns):
|
|
13990
|
+
"""
|
|
13991
|
+
DESCRIPTION:
|
|
13992
|
+
cube() function creates a multi-dimensional cube for the DataFrame
|
|
13993
|
+
using the specified column(s), and there by running aggregates on
|
|
13994
|
+
it to produce the aggregations on different dimensions.
|
|
13995
|
+
|
|
13996
|
+
|
|
13997
|
+
PARAMETERS:
|
|
13998
|
+
columns:
|
|
13999
|
+
Required Argument.
|
|
14000
|
+
Specifies the name(s) of input teradataml DataFrame column(s).
|
|
14001
|
+
Types: str OR list of str(s)
|
|
14002
|
+
|
|
14003
|
+
RETURNS:
|
|
14004
|
+
teradataml DataFrameGroupBy
|
|
14005
|
+
|
|
14006
|
+
RAISES:
|
|
14007
|
+
TeradataMlException
|
|
14008
|
+
|
|
14009
|
+
EXAMPLES :
|
|
14010
|
+
# Example 1: Analyzes the data by grouping into masters and stats dimensions.
|
|
14011
|
+
>>> load_example_data("dataframe","admissions_train")
|
|
14012
|
+
>>> df = DataFrame("admissions_train")
|
|
14013
|
+
>>> df1 = df.cube(["masters", "stats"]).sum()
|
|
14014
|
+
>>> df1
|
|
14015
|
+
masters stats sum_id sum_gpa sum_admitted
|
|
14016
|
+
0 no Beginner 8 3.60 1
|
|
14017
|
+
1 None Advanced 555 84.21 16
|
|
14018
|
+
2 None Beginner 21 18.31 3
|
|
14019
|
+
3 yes Beginner 13 14.71 2
|
|
14020
|
+
4 None None 820 141.67 26
|
|
14021
|
+
5 yes Advanced 366 49.26 7
|
|
14022
|
+
6 no None 343 63.96 16
|
|
14023
|
+
7 None Novice 244 39.15 7
|
|
14024
|
+
8 no Advanced 189 34.95 9
|
|
14025
|
+
9 yes Novice 98 13.74 1
|
|
14026
|
+
|
|
14027
|
+
"""
|
|
14028
|
+
# Validate columns argument.
|
|
14029
|
+
arg_info_matrix = []
|
|
14030
|
+
arg_info_matrix.append(["columns", columns, False, (str, list), True])
|
|
14031
|
+
|
|
14032
|
+
# Validate argument types
|
|
14033
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
14034
|
+
|
|
14035
|
+
# Checking each element in passed columns to be valid column in dataframe
|
|
14036
|
+
_Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
|
|
14037
|
+
|
|
14038
|
+
# Query generation of cube API is same as the group by.
|
|
14039
|
+
# Only 'cube' is concatenated with 'group by' clause.
|
|
14040
|
+
return self.groupby(columns, option="cube")
|
|
14041
|
+
|
|
14042
|
+
@collect_queryband(queryband="DF_rollup")
|
|
14043
|
+
def rollup(self, columns):
|
|
14044
|
+
"""
|
|
14045
|
+
DESCRIPTION:
|
|
14046
|
+
rollup() function creates a multi-dimensional rollup for the DataFrame
|
|
14047
|
+
using the specified column(s), and there by running aggregates on
|
|
14048
|
+
it to produce the aggregations on different dimensions.
|
|
14049
|
+
|
|
14050
|
+
|
|
14051
|
+
PARAMETERS:
|
|
14052
|
+
columns:
|
|
14053
|
+
Required Argument.
|
|
14054
|
+
Specifies the name(s) of input teradataml DataFrame column(s).
|
|
14055
|
+
Types: str OR list of str(s)
|
|
14056
|
+
|
|
14057
|
+
RETURNS:
|
|
14058
|
+
teradataml DataFrameGroupBy
|
|
14059
|
+
|
|
14060
|
+
RAISES:
|
|
14061
|
+
TeradataMlException
|
|
14062
|
+
|
|
14063
|
+
EXAMPLES :
|
|
14064
|
+
# Example 1: Analyzes the data by grouping into masters and stats dimensions.
|
|
14065
|
+
>>> load_example_data("dataframe","admissions_train")
|
|
14066
|
+
>>> df = DataFrame("admissions_train")
|
|
14067
|
+
>>> df1 = df.rollup(["masters", "stats"]).sum()
|
|
14068
|
+
>>> df1
|
|
14069
|
+
masters stats sum_id sum_gpa sum_admitted
|
|
14070
|
+
0 no None 343 63.96 16
|
|
14071
|
+
1 yes None 477 77.71 10
|
|
14072
|
+
2 None None 820 141.67 26
|
|
14073
|
+
3 no Novice 146 25.41 6
|
|
14074
|
+
4 no Beginner 8 3.60 1
|
|
14075
|
+
5 yes Novice 98 13.74 1
|
|
14076
|
+
6 yes Beginner 13 14.71 2
|
|
14077
|
+
7 yes Advanced 366 49.26 7
|
|
14078
|
+
8 no Advanced 189 34.95 9
|
|
14079
|
+
|
|
14080
|
+
"""
|
|
14081
|
+
# Validate columns argument.
|
|
14082
|
+
arg_info_matrix = []
|
|
14083
|
+
arg_info_matrix.append(["columns", columns, False, (str, list), True])
|
|
14084
|
+
|
|
14085
|
+
# Validate argument types
|
|
14086
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
14087
|
+
|
|
14088
|
+
# Checking each element in passed columns to be valid column in dataframe
|
|
14089
|
+
_Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
|
|
14090
|
+
|
|
14091
|
+
# Query generation of cube API is same as the group by.
|
|
14092
|
+
# Only 'rollup' is concatenated with 'group by' clause.
|
|
14093
|
+
return self.groupby(columns, option="rollup")
|
|
14094
|
+
|
|
13516
14095
|
|
|
13517
14096
|
class DataFrameGroupBy(DataFrame):
|
|
13518
14097
|
"""
|
|
@@ -13520,7 +14099,8 @@ class DataFrameGroupBy(DataFrame):
|
|
|
13520
14099
|
Updates AED node for DataFrame groupby object.
|
|
13521
14100
|
|
|
13522
14101
|
"""
|
|
13523
|
-
|
|
14102
|
+
|
|
14103
|
+
def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupbyexpr, column_list, option=None):
|
|
13524
14104
|
"""
|
|
13525
14105
|
init() method for DataFrameGroupBy.
|
|
13526
14106
|
|
|
@@ -13555,11 +14135,17 @@ class DataFrameGroupBy(DataFrame):
|
|
|
13555
14135
|
Specifies list of columns provided by user to be part group by clause.
|
|
13556
14136
|
Types: str or List of Strings
|
|
13557
14137
|
|
|
14138
|
+
option:
|
|
14139
|
+
Optional Argument.
|
|
14140
|
+
Specifies the groupby option.
|
|
14141
|
+
Permitted Values: "CUBE", "ROLLUP", None
|
|
14142
|
+
Types: str or NoneType
|
|
14143
|
+
|
|
13558
14144
|
RETURNS:
|
|
13559
14145
|
teradataml DataFrameGroupBy instance
|
|
13560
14146
|
"""
|
|
13561
14147
|
super(DataFrameGroupBy, self).__init__()
|
|
13562
|
-
self._nodeid = self._aed_utils._aed_groupby(nodeid, groupbyexpr)
|
|
14148
|
+
self._nodeid = self._aed_utils._aed_groupby(nodeid, groupbyexpr, option)
|
|
13563
14149
|
self._metaexpr = metaexpr
|
|
13564
14150
|
self._column_names_and_types = column_names_and_types
|
|
13565
14151
|
self._columns = columns
|
|
@@ -13583,7 +14169,7 @@ class DataFrameGroupBy(DataFrame):
|
|
|
13583
14169
|
allowed_types = self._get_assign_allowed_types()
|
|
13584
14170
|
"""
|
|
13585
14171
|
from sqlalchemy.sql.functions import Function
|
|
13586
|
-
return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression)
|
|
14172
|
+
return (type(None), int, float, str, decimal.Decimal, Function, ColumnExpression, ClauseElement)
|
|
13587
14173
|
|
|
13588
14174
|
def _generate_assign_metaexpr_aed_nodeid(self, drop_columns, **kwargs):
|
|
13589
14175
|
"""
|
|
@@ -13675,8 +14261,10 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
13675
14261
|
Updates AED node for DataFrame GROUP BY TIME object.
|
|
13676
14262
|
|
|
13677
14263
|
"""
|
|
13678
|
-
|
|
13679
|
-
|
|
14264
|
+
|
|
14265
|
+
def __init__(self, nodeid, metaexpr, column_names_and_types, columns, groupby_value_expr, column_list,
|
|
14266
|
+
timebucket_duration,
|
|
14267
|
+
value_expression=None, timecode_column=None, sequence_column=None, fill=None):
|
|
13680
14268
|
"""
|
|
13681
14269
|
init() method for DataFrameGroupByTime.
|
|
13682
14270
|
|
|
@@ -13762,10 +14350,10 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
13762
14350
|
timecode_column = "" if timecode_column is None else UtilFuncs._process_for_teradata_keyword(timecode_column)
|
|
13763
14351
|
sequence_column = "" if sequence_column is None else UtilFuncs._process_for_teradata_keyword(sequence_column)
|
|
13764
14352
|
|
|
13765
|
-
self._nodeid = self._aed_utils._aed_groupby_time(nodeid
|
|
13766
|
-
value_expression
|
|
13767
|
-
using_timecode
|
|
13768
|
-
fill
|
|
14353
|
+
self._nodeid = self._aed_utils._aed_groupby_time(nodeid=nodeid, timebucket_duration=timebucket_duration,
|
|
14354
|
+
value_expression=groupby_value_expr,
|
|
14355
|
+
using_timecode=timecode_column, seqno_col=sequence_column,
|
|
14356
|
+
fill=fill)
|
|
13769
14357
|
|
|
13770
14358
|
# MetaExpression is same as that of parent.
|
|
13771
14359
|
self._metaexpr = metaexpr
|
|
@@ -13788,6 +14376,7 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
13788
14376
|
self._sequence_column = sequence_column
|
|
13789
14377
|
self._fill = fill
|
|
13790
14378
|
|
|
14379
|
+
@collect_queryband(queryband="DF_bottom")
|
|
13791
14380
|
def bottom(self, number_of_values_to_column, with_ties=False):
|
|
13792
14381
|
"""
|
|
13793
14382
|
DESCRIPTION:
|
|
@@ -14059,6 +14648,7 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
14059
14648
|
|
|
14060
14649
|
return self.__process_time_series_aggregate_with_multi_input_arguments(number_of_values_to_column, operation)
|
|
14061
14650
|
|
|
14651
|
+
@collect_queryband(queryband="DF_deltaT")
|
|
14062
14652
|
def delta_t(self, start_condition, end_condition):
|
|
14063
14653
|
"""
|
|
14064
14654
|
DESCRIPTION:
|
|
@@ -14310,12 +14900,14 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
14310
14900
|
operation = "delta_t"
|
|
14311
14901
|
|
|
14312
14902
|
kwargs = {
|
|
14313
|
-
"start_condition": start_condition.compile() if isinstance(start_condition,
|
|
14903
|
+
"start_condition": start_condition.compile() if isinstance(start_condition,
|
|
14904
|
+
ColumnExpression) else start_condition,
|
|
14314
14905
|
"end_condition": end_condition.compile() if isinstance(end_condition, ColumnExpression) else end_condition
|
|
14315
14906
|
}
|
|
14316
14907
|
return self._get_dataframe_aggregate(operation=operation, **kwargs)
|
|
14317
14908
|
|
|
14318
|
-
|
|
14909
|
+
@collect_queryband(queryband="DF_first")
|
|
14910
|
+
def first(self, columns=None):
|
|
14319
14911
|
"""
|
|
14320
14912
|
DESCRIPTION:
|
|
14321
14913
|
Returns the oldest value, determined by the timecode, for each group. FIRST is a single-threaded function.
|
|
@@ -14507,8 +15099,9 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
14507
15099
|
# Checking each element in passed columns to be valid column in dataframe
|
|
14508
15100
|
_Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
|
|
14509
15101
|
|
|
14510
|
-
return self._get_dataframe_aggregate(operation
|
|
15102
|
+
return self._get_dataframe_aggregate(operation='first', columns=columns)
|
|
14511
15103
|
|
|
15104
|
+
@collect_queryband(queryband="DF_last")
|
|
14512
15105
|
def last(self, columns=None):
|
|
14513
15106
|
"""
|
|
14514
15107
|
DESCRIPTION:
|
|
@@ -14703,8 +15296,9 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
14703
15296
|
# Checking each element in passed columns to be valid column in dataframe
|
|
14704
15297
|
_Validators._validate_column_exists_in_dataframe(columns, self._metaexpr)
|
|
14705
15298
|
|
|
14706
|
-
return self._get_dataframe_aggregate(operation
|
|
15299
|
+
return self._get_dataframe_aggregate(operation='last', columns=columns)
|
|
14707
15300
|
|
|
15301
|
+
@collect_queryband(queryband="DF_mad")
|
|
14708
15302
|
def mad(self, constant_multiplier_columns=None):
|
|
14709
15303
|
"""
|
|
14710
15304
|
DESCRIPTION:
|
|
@@ -14912,6 +15506,7 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
14912
15506
|
|
|
14913
15507
|
return self.__process_time_series_aggregate_with_multi_input_arguments(constant_multiplier_columns, 'mad')
|
|
14914
15508
|
|
|
15509
|
+
@collect_queryband(queryband="DF_mode")
|
|
14915
15510
|
def mode(self):
|
|
14916
15511
|
"""
|
|
14917
15512
|
DESCRIPTION:
|
|
@@ -15066,6 +15661,7 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
15066
15661
|
"""
|
|
15067
15662
|
return self._get_dataframe_aggregate(operation='mode')
|
|
15068
15663
|
|
|
15664
|
+
@collect_queryband(queryband="DF_percentile")
|
|
15069
15665
|
def percentile(self, percentile, distinct=False, interpolation="LINEAR"):
|
|
15070
15666
|
"""
|
|
15071
15667
|
DESCRIPTION:
|
|
@@ -15330,6 +15926,7 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
15330
15926
|
return self._get_dataframe_aggregate(operation='percentile', percentile=percentile,
|
|
15331
15927
|
distinct=distinct, interpolation=interpolation)
|
|
15332
15928
|
|
|
15929
|
+
@collect_queryband(queryband="DF_top")
|
|
15333
15930
|
def top(self, number_of_values_to_column, with_ties=False):
|
|
15334
15931
|
"""
|
|
15335
15932
|
DESCRIPTION:
|
|
@@ -15745,7 +16342,8 @@ class DataFrameGroupByTime(DataFrame):
|
|
|
15745
16342
|
remaining_columns = list(set(self.columns) - set(columns_processed))
|
|
15746
16343
|
unsupported_types = _Dtypes._get_unsupported_data_types_for_aggregate_operations(operation)
|
|
15747
16344
|
for column in remaining_columns:
|
|
15748
|
-
if not isinstance(self._td_column_names_and_sqlalchemy_types[column.lower()],
|
|
16345
|
+
if not isinstance(self._td_column_names_and_sqlalchemy_types[column.lower()],
|
|
16346
|
+
tuple(unsupported_types)):
|
|
15749
16347
|
# We should not involve columns used in value expression of GROUP BY TIME clause as well.
|
|
15750
16348
|
if column not in self._value_expression:
|
|
15751
16349
|
default_constant_for_columns.append(column)
|
|
@@ -15964,9 +16562,9 @@ class _TDUAF(DataFrame):
|
|
|
15964
16562
|
self._awu_matrix.append(["id_sequence", self._id_sequence, True, (str, list), True])
|
|
15965
16563
|
self._awu_matrix.append(["payload_field", self._payload_field, not self._is_payload_required(), (str, list), True])
|
|
15966
16564
|
self._awu_matrix.append(["payload_content", self._payload_content, not self._is_payload_required(), str, True,
|
|
15967
|
-
|
|
15968
|
-
|
|
15969
|
-
|
|
16565
|
+
["REAL", "COMPLEX", "AMPL_PHASE", "AMPL_PHASE_RADIANS", "AMPL_PHASE_DEGREES",
|
|
16566
|
+
"MULTIVAR_REAL", "MULTIVAR_COMPLEX", "MULTIVAR_ANYTYPE", "MULTIVAR_AMPL_PHASE",
|
|
16567
|
+
"MULTIVAR_AMPL_PHASE_RADIANS", "MULTIVAR_AMPL_PHASE_DEGREES"]])
|
|
15970
16568
|
self._awu_matrix.append(["layer", self._layer, True, str, True])
|
|
15971
16569
|
|
|
15972
16570
|
# store the columns to check against the DataFrame.
|
|
@@ -16061,7 +16659,7 @@ class _TDUAF(DataFrame):
|
|
|
16061
16659
|
# Declare a function to return a generator object. Note that, this should be a function because,
|
|
16062
16660
|
# generator object exhausts after the first usage. So, if it is a regular variable,
|
|
16063
16661
|
# _non_parameterised_sql will not have any data to consume from generator object.
|
|
16064
|
-
get_sql_clauses = lambda
|
|
16662
|
+
get_sql_clauses = lambda: ("{}{}".format(" " * 4, c) for c in sql_clauses)
|
|
16065
16663
|
|
|
16066
16664
|
self._parameterised_sql = self._spec_header.format(
|
|
16067
16665
|
"\n" + ", \n".join(get_sql_clauses()).format(*(["?"]*len(sql_values)))), sql_values
|
|
@@ -16345,7 +16943,7 @@ class TDSeries(_TDUAF):
|
|
|
16345
16943
|
|
|
16346
16944
|
self._additional_spec["SERIES_ID ({})"] = ", ".join(UtilFuncs._as_list(self._id))
|
|
16347
16945
|
|
|
16348
|
-
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns
|
|
16946
|
+
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
|
|
16349
16947
|
"""
|
|
16350
16948
|
DESCRIPTION:
|
|
16351
16949
|
Function to call the _create_validate_dataframe_from_node which will create
|
|
@@ -16527,7 +17125,7 @@ class TDAnalyticResult(_TDUAF):
|
|
|
16527
17125
|
|
|
16528
17126
|
self._spec_header = "ART_SPEC({})"
|
|
16529
17127
|
|
|
16530
|
-
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns
|
|
17128
|
+
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
|
|
16531
17129
|
"""
|
|
16532
17130
|
DESCRIPTION:
|
|
16533
17131
|
Function to call the _create_validate_dataframe_from_node which will create
|
|
@@ -16716,7 +17314,8 @@ class TDMatrix(_TDUAF):
|
|
|
16716
17314
|
self._column_index = column_index
|
|
16717
17315
|
self._column_index_style = column_index_style
|
|
16718
17316
|
|
|
16719
|
-
super().__init__(data=data, id=id, row_index=row_index, row_index_style=row_index_style,
|
|
17317
|
+
super().__init__(data=data, id=id, row_index=row_index, row_index_style=row_index_style,
|
|
17318
|
+
id_sequence=id_sequence,
|
|
16720
17319
|
payload_field=payload_field, payload_content=payload_content, layer=layer)
|
|
16721
17320
|
self._awu_matrix.append(["column_index", self._column_index, False, (str), True])
|
|
16722
17321
|
self._awu_matrix.append(
|
|
@@ -16733,8 +17332,7 @@ class TDMatrix(_TDUAF):
|
|
|
16733
17332
|
|
|
16734
17333
|
self._additional_spec["MATRIX_ID ({})"] = ", ".join(UtilFuncs._as_list(self._id))
|
|
16735
17334
|
|
|
16736
|
-
|
|
16737
|
-
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns = None):
|
|
17335
|
+
def _create_dataframe_from_node(self, nodeid, metaexpr, index_label, undropped_columns=None):
|
|
16738
17336
|
"""
|
|
16739
17337
|
DESCRIPTION:
|
|
16740
17338
|
Function to call the _create_validate_dataframe_from_node which will create
|
|
@@ -16793,6 +17391,7 @@ class TDGenSeries():
|
|
|
16793
17391
|
"""
|
|
16794
17392
|
TDGenSeries class for UAF Functions.
|
|
16795
17393
|
"""
|
|
17394
|
+
|
|
16796
17395
|
def __init__(self, instances, data_types, start, offset, num_entries):
|
|
16797
17396
|
"""
|
|
16798
17397
|
Generate a series to be passed to a UAF function rather than using a
|
|
@@ -16849,7 +17448,7 @@ class TDGenSeries():
|
|
|
16849
17448
|
# Create a TDGenSeries object to be passed as input to UAF functions.
|
|
16850
17449
|
>>> series = TDGenSeries(instances = {"BuoyID": 3}, data_types = INTEGER(), start=0, offset=1, num_entries=5)
|
|
16851
17450
|
"""
|
|
16852
|
-
|
|
17451
|
+
|
|
16853
17452
|
self._instances = instances
|
|
16854
17453
|
self._data_types = data_types
|
|
16855
17454
|
self._start = start
|
|
@@ -16943,4 +17542,4 @@ class TDGenSeries():
|
|
|
16943
17542
|
if not self._parameterised_sql:
|
|
16944
17543
|
self._generate_spec()
|
|
16945
17544
|
|
|
16946
|
-
return self._parameterised_sql
|
|
17545
|
+
return self._parameterised_sql
|