PyPI - teradataml - Versions diffs - 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl - Mend

teradataml 17.20.0.6py3-none-any.whl → 20.0.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (432) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +238 -1
teradataml/__init__.py +13 -3
teradataml/_version.py +1 -1
teradataml/analytics/Transformations.py +4 -4
teradataml/analytics/__init__.py +0 -2
teradataml/analytics/analytic_function_executor.py +3 -0
teradataml/analytics/json_parser/utils.py +13 -12
teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
teradataml/analytics/sqle/__init__.py +0 -13
teradataml/analytics/utils.py +1 -0
teradataml/analytics/valib.py +3 -0
teradataml/automl/__init__.py +1628 -0
teradataml/automl/custom_json_utils.py +1270 -0
teradataml/automl/data_preparation.py +993 -0
teradataml/automl/data_transformation.py +727 -0
teradataml/automl/feature_engineering.py +1648 -0
teradataml/automl/feature_exploration.py +547 -0
teradataml/automl/model_evaluation.py +163 -0
teradataml/automl/model_training.py +887 -0
teradataml/catalog/__init__.py +0 -2
teradataml/catalog/byom.py +49 -6
teradataml/catalog/function_argument_mapper.py +0 -2
teradataml/catalog/model_cataloging_utils.py +2 -1021
teradataml/common/aed_utils.py +6 -2
teradataml/common/constants.py +50 -58
teradataml/common/deprecations.py +160 -0
teradataml/common/garbagecollector.py +61 -104
teradataml/common/messagecodes.py +27 -36
teradataml/common/messages.py +11 -15
teradataml/common/utils.py +205 -287
teradataml/common/wrapper_utils.py +1 -110
teradataml/context/context.py +150 -78
teradataml/data/bank_churn.csv +10001 -0
teradataml/data/bmi.csv +501 -0
teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
teradataml/data/fish.csv +160 -0
teradataml/data/glass_types.csv +215 -0
teradataml/data/insurance.csv +1 -1
teradataml/data/iris_data.csv +151 -0
teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
teradataml/data/load_example_data.py +3 -0
teradataml/data/multi_model_classification.csv +401 -0
teradataml/data/multi_model_regression.csv +401 -0
teradataml/data/openml_example.json +63 -0
teradataml/data/scripts/deploy_script.py +65 -0
teradataml/data/scripts/mapper.R +20 -0
teradataml/data/scripts/sklearn/__init__.py +0 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
teradataml/data/templates/open_source_ml.json +9 -0
teradataml/data/teradataml_example.json +73 -1
teradataml/data/test_classification.csv +101 -0
teradataml/data/test_prediction.csv +101 -0
teradataml/data/test_regression.csv +101 -0
teradataml/data/train_multiclass.csv +101 -0
teradataml/data/train_regression.csv +101 -0
teradataml/data/train_regression_multiple_labels.csv +101 -0
teradataml/data/wine_data.csv +1600 -0
teradataml/dataframe/copy_to.py +79 -13
teradataml/dataframe/data_transfer.py +8 -0
teradataml/dataframe/dataframe.py +910 -311
teradataml/dataframe/dataframe_utils.py +102 -5
teradataml/dataframe/fastload.py +11 -3
teradataml/dataframe/setop.py +15 -2
teradataml/dataframe/sql.py +3735 -77
teradataml/dataframe/sql_function_parameters.py +56 -5
teradataml/dataframe/vantage_function_types.py +45 -1
teradataml/dataframe/window.py +30 -29
teradataml/dbutils/dbutils.py +18 -1
teradataml/geospatial/geodataframe.py +18 -7
teradataml/geospatial/geodataframecolumn.py +5 -0
teradataml/hyperparameter_tuner/optimizer.py +910 -120
teradataml/hyperparameter_tuner/utils.py +131 -37
teradataml/lib/aed_0_1.dll +0 -0
teradataml/lib/libaed_0_1.dylib +0 -0
teradataml/lib/libaed_0_1.so +0 -0
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/__init__.py +1 -0
teradataml/opensource/sklearn/__init__.py +1 -0
teradataml/opensource/sklearn/_class.py +255 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
teradataml/opensource/sklearn/constants.py +54 -0
teradataml/options/__init__.py +3 -6
teradataml/options/configure.py +21 -20
teradataml/scriptmgmt/UserEnv.py +61 -5
teradataml/scriptmgmt/lls_utils.py +135 -53
teradataml/table_operators/Apply.py +38 -6
teradataml/table_operators/Script.py +45 -308
teradataml/table_operators/TableOperator.py +182 -591
teradataml/table_operators/__init__.py +0 -1
teradataml/table_operators/table_operator_util.py +32 -40
teradataml/utils/validators.py +127 -3
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
teradataml/analytics/mle/AdaBoost.py +0 -651
teradataml/analytics/mle/AdaBoostPredict.py +0 -564
teradataml/analytics/mle/Antiselect.py +0 -342
teradataml/analytics/mle/Arima.py +0 -641
teradataml/analytics/mle/ArimaPredict.py +0 -477
teradataml/analytics/mle/Attribution.py +0 -1070
teradataml/analytics/mle/Betweenness.py +0 -658
teradataml/analytics/mle/Burst.py +0 -711
teradataml/analytics/mle/CCM.py +0 -600
teradataml/analytics/mle/CCMPrepare.py +0 -324
teradataml/analytics/mle/CFilter.py +0 -460
teradataml/analytics/mle/ChangePointDetection.py +0 -572
teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
teradataml/analytics/mle/Closeness.py +0 -737
teradataml/analytics/mle/ConfusionMatrix.py +0 -420
teradataml/analytics/mle/Correlation.py +0 -477
teradataml/analytics/mle/Correlation2.py +0 -573
teradataml/analytics/mle/CoxHazardRatio.py +0 -679
teradataml/analytics/mle/CoxPH.py +0 -556
teradataml/analytics/mle/CoxSurvival.py +0 -478
teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
teradataml/analytics/mle/DTW.py +0 -623
teradataml/analytics/mle/DWT.py +0 -564
teradataml/analytics/mle/DWT2D.py +0 -599
teradataml/analytics/mle/DecisionForest.py +0 -716
teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
teradataml/analytics/mle/DecisionForestPredict.py +0 -561
teradataml/analytics/mle/DecisionTree.py +0 -830
teradataml/analytics/mle/DecisionTreePredict.py +0 -528
teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
teradataml/analytics/mle/FMeasure.py +0 -402
teradataml/analytics/mle/FPGrowth.py +0 -734
teradataml/analytics/mle/FrequentPaths.py +0 -695
teradataml/analytics/mle/GLM.py +0 -558
teradataml/analytics/mle/GLML1L2.py +0 -547
teradataml/analytics/mle/GLML1L2Predict.py +0 -519
teradataml/analytics/mle/GLMPredict.py +0 -529
teradataml/analytics/mle/HMMDecoder.py +0 -945
teradataml/analytics/mle/HMMEvaluator.py +0 -901
teradataml/analytics/mle/HMMSupervised.py +0 -521
teradataml/analytics/mle/HMMUnsupervised.py +0 -572
teradataml/analytics/mle/Histogram.py +0 -561
teradataml/analytics/mle/IDWT.py +0 -476
teradataml/analytics/mle/IDWT2D.py +0 -493
teradataml/analytics/mle/IdentityMatch.py +0 -763
teradataml/analytics/mle/Interpolator.py +0 -918
teradataml/analytics/mle/KMeans.py +0 -485
teradataml/analytics/mle/KNN.py +0 -627
teradataml/analytics/mle/KNNRecommender.py +0 -488
teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
teradataml/analytics/mle/LAR.py +0 -439
teradataml/analytics/mle/LARPredict.py +0 -478
teradataml/analytics/mle/LDA.py +0 -548
teradataml/analytics/mle/LDAInference.py +0 -492
teradataml/analytics/mle/LDATopicSummary.py +0 -464
teradataml/analytics/mle/LevenshteinDistance.py +0 -450
teradataml/analytics/mle/LinReg.py +0 -433
teradataml/analytics/mle/LinRegPredict.py +0 -438
teradataml/analytics/mle/MinHash.py +0 -544
teradataml/analytics/mle/Modularity.py +0 -587
teradataml/analytics/mle/NEREvaluator.py +0 -410
teradataml/analytics/mle/NERExtractor.py +0 -595
teradataml/analytics/mle/NERTrainer.py +0 -458
teradataml/analytics/mle/NGrams.py +0 -570
teradataml/analytics/mle/NPath.py +0 -634
teradataml/analytics/mle/NTree.py +0 -549
teradataml/analytics/mle/NaiveBayes.py +0 -462
teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
teradataml/analytics/mle/NamedEntityFinder.py +0 -529
teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
teradataml/analytics/mle/POSTagger.py +0 -417
teradataml/analytics/mle/Pack.py +0 -411
teradataml/analytics/mle/PageRank.py +0 -535
teradataml/analytics/mle/PathAnalyzer.py +0 -426
teradataml/analytics/mle/PathGenerator.py +0 -367
teradataml/analytics/mle/PathStart.py +0 -464
teradataml/analytics/mle/PathSummarizer.py +0 -470
teradataml/analytics/mle/Pivot.py +0 -471
teradataml/analytics/mle/ROC.py +0 -425
teradataml/analytics/mle/RandomSample.py +0 -637
teradataml/analytics/mle/RandomWalkSample.py +0 -490
teradataml/analytics/mle/SAX.py +0 -779
teradataml/analytics/mle/SVMDense.py +0 -677
teradataml/analytics/mle/SVMDensePredict.py +0 -536
teradataml/analytics/mle/SVMDenseSummary.py +0 -437
teradataml/analytics/mle/SVMSparse.py +0 -557
teradataml/analytics/mle/SVMSparsePredict.py +0 -553
teradataml/analytics/mle/SVMSparseSummary.py +0 -435
teradataml/analytics/mle/Sampling.py +0 -549
teradataml/analytics/mle/Scale.py +0 -565
teradataml/analytics/mle/ScaleByPartition.py +0 -496
teradataml/analytics/mle/ScaleMap.py +0 -378
teradataml/analytics/mle/ScaleSummary.py +0 -320
teradataml/analytics/mle/SentenceExtractor.py +0 -363
teradataml/analytics/mle/SentimentEvaluator.py +0 -432
teradataml/analytics/mle/SentimentExtractor.py +0 -578
teradataml/analytics/mle/SentimentTrainer.py +0 -405
teradataml/analytics/mle/SeriesSplitter.py +0 -641
teradataml/analytics/mle/Sessionize.py +0 -475
teradataml/analytics/mle/SimpleMovAvg.py +0 -397
teradataml/analytics/mle/StringSimilarity.py +0 -425
teradataml/analytics/mle/TF.py +0 -389
teradataml/analytics/mle/TFIDF.py +0 -504
teradataml/analytics/mle/TextChunker.py +0 -414
teradataml/analytics/mle/TextClassifier.py +0 -399
teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
teradataml/analytics/mle/TextMorph.py +0 -494
teradataml/analytics/mle/TextParser.py +0 -623
teradataml/analytics/mle/TextTagger.py +0 -530
teradataml/analytics/mle/TextTokenizer.py +0 -502
teradataml/analytics/mle/UnivariateStatistics.py +0 -488
teradataml/analytics/mle/Unpack.py +0 -526
teradataml/analytics/mle/Unpivot.py +0 -438
teradataml/analytics/mle/VarMax.py +0 -776
teradataml/analytics/mle/VectorDistance.py +0 -762
teradataml/analytics/mle/WeightedMovAvg.py +0 -400
teradataml/analytics/mle/XGBoost.py +0 -842
teradataml/analytics/mle/XGBoostPredict.py +0 -627
teradataml/analytics/mle/__init__.py +0 -123
teradataml/analytics/mle/json/adaboost_mle.json +0 -135
teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
teradataml/analytics/mle/json/antiselect_mle.json +0 -34
teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
teradataml/analytics/mle/json/arima_mle.json +0 -172
teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
teradataml/analytics/mle/json/betweenness_mle.json +0 -97
teradataml/analytics/mle/json/burst_mle.json +0 -140
teradataml/analytics/mle/json/ccm_mle.json +0 -124
teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
teradataml/analytics/mle/json/cfilter_mle.json +0 -93
teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
teradataml/analytics/mle/json/closeness_mle.json +0 -104
teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
teradataml/analytics/mle/json/correlation_mle.json +0 -86
teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
teradataml/analytics/mle/json/coxph_mle.json +0 -98
teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
teradataml/analytics/mle/json/dtw_mle.json +0 -97
teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
teradataml/analytics/mle/json/dwt_mle.json +0 -101
teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
teradataml/analytics/mle/json/glm_mle.json +0 -111
teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
teradataml/analytics/mle/json/histogram_mle.json +0 -100
teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
teradataml/analytics/mle/json/idwt_mle.json +0 -66
teradataml/analytics/mle/json/interpolator_mle.json +0 -151
teradataml/analytics/mle/json/kmeans_mle.json +0 -97
teradataml/analytics/mle/json/knn_mle.json +0 -141
teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
teradataml/analytics/mle/json/lar_mle.json +0 -78
teradataml/analytics/mle/json/larpredict_mle.json +0 -69
teradataml/analytics/mle/json/lda_mle.json +0 -130
teradataml/analytics/mle/json/ldainference_mle.json +0 -78
teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
teradataml/analytics/mle/json/linreg_mle.json +0 -42
teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
teradataml/analytics/mle/json/minhash_mle.json +0 -113
teradataml/analytics/mle/json/modularity_mle.json +0 -91
teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
teradataml/analytics/mle/json/ngrams_mle.json +0 -137
teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
teradataml/analytics/mle/json/pack_mle.json +0 -58
teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
teradataml/analytics/mle/json/pagerank_mle.json +0 -81
teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
teradataml/analytics/mle/json/pathstart_mle.json +0 -62
teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
teradataml/analytics/mle/json/pivoting_mle.json +0 -71
teradataml/analytics/mle/json/postagger_mle.json +0 -51
teradataml/analytics/mle/json/randomsample_mle.json +0 -131
teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
teradataml/analytics/mle/json/roc_mle.json +0 -73
teradataml/analytics/mle/json/sampling_mle.json +0 -75
teradataml/analytics/mle/json/sax_mle.json +0 -154
teradataml/analytics/mle/json/scale_mle.json +0 -93
teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
teradataml/analytics/mle/json/scalemap_mle.json +0 -44
teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
teradataml/analytics/mle/json/svmdense_mle.json +0 -165
teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
teradataml/analytics/mle/json/textchunker_mle.json +0 -40
teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
teradataml/analytics/mle/json/textmorph_mle.json +0 -63
teradataml/analytics/mle/json/textparser_mle.json +0 -166
teradataml/analytics/mle/json/texttagger_mle.json +0 -81
teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
teradataml/analytics/mle/json/tf_mle.json +0 -33
teradataml/analytics/mle/json/tfidf_mle.json +0 -34
teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
teradataml/analytics/mle/json/unpack_mle.json +0 -91
teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
teradataml/analytics/mle/json/varmax_mle.json +0 -176
teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
teradataml/analytics/mle/json/xgboost_mle.json +0 -178
teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
teradataml/analytics/sqle/Antiselect.py +0 -321
teradataml/analytics/sqle/Attribution.py +0 -603
teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
teradataml/analytics/sqle/GLMPredict.py +0 -430
teradataml/analytics/sqle/MovingAverage.py +0 -543
teradataml/analytics/sqle/NGramSplitter.py +0 -548
teradataml/analytics/sqle/NPath.py +0 -632
teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
teradataml/analytics/sqle/Pack.py +0 -388
teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
teradataml/analytics/sqle/Sessionize.py +0 -390
teradataml/analytics/sqle/StringSimilarity.py +0 -400
teradataml/analytics/sqle/Unpack.py +0 -503
teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
teradataml/analytics/sqle/json/npath_sqle.json +0 -67
teradataml/analytics/sqle/json/pack_sqle.json +0 -47
teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
teradataml/catalog/model_cataloging.py +0 -980
teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
teradataml/table_operators/sandbox_container_util.py +0 -643
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
{teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0

teradataml/automl/feature_exploration.py ADDED Viewed

@@ -0,0 +1,547 @@
+# ##################################################################
+#
+# Copyright 2024 Teradata. All rights reserved.
+# TERADATA CONFIDENTIAL AND TRADE SECRET
+#
+# Primary Owner: Sweta Shaw
+# Email Id: Sweta.Shaw@Teradata.com
+#
+# Secondary Owner: Akhil Bisht
+# Email Id: AKHIL.BISHT@Teradata.com
+#
+# Version: 1.1
+# Function Version: 1.0
+# ##################################################################
+# Teradata libraries
+from teradataml.dataframe.dataframe import DataFrame
+from teradataml.dataframe.copy_to import copy_to_sql
+from teradataml import ColumnSummary, CategoricalSummary, GetFutileColumns
+from teradataml import OutlierFilterFit, OutlierFilterTransform
+from teradataml.hyperparameter_tuner.utils import _ProgressBar
+from teradataml.common.messages import Messages, MessageCodes
+def _is_terminal():
+    """
+    DESCRIPTION:
+        Common Function detects whether code is running in
+        terminal/console or IPython supported environment.
+    RETURNS:
+        bool.
+    """
+    if not hasattr(_is_terminal, 'ipython_imported'):
+        try:
+            # Check IPython environment
+            __IPYTHON__
+            # Check if IPython library is installed
+            from IPython.display import display, HTML
+            _is_terminal.ipython_imported = True
+        except (NameError, ImportError):
+            # If error, then terminal
+            _is_terminal.ipython_imported = False
+    return not _is_terminal.ipython_imported
+# # conditional import
+if not _is_terminal():
+    from IPython.display import display, HTML
+class _FeatureExplore:
+    def __init__(self,
+                data=None,
+                target_column=None,
+                verbose=0):
+        """
+        DESCRIPTION:
+            Internal function initializes the data, target column for feature exploration.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input teradataml DataFrame for feature exploration.
+                Types: teradataml Dataframe
+            target_column:
+                Required Arugment.
+                Specifies the name of the target column in "data".
+                Types: str
+            verbose:
+                Optional Argument.
+                Specifies the detailed execution steps based on verbose level.
+                Default Value: 0
+                Permitted Values:
+                    * 0: prints the progress bar and leaderboard
+                    * 1: prints the execution steps of AutoML.
+                    * 2: prints the intermediate data between the execution of each step of AutoML.
+                Types: int
+        """
+        self.data = data
+        self.target_column = target_column
+        self.verbose = verbose
+        self.terminal_print = _is_terminal()
+        self.style = self._common_style()
+    def _exploration(self):
+        """
+        DESCRIPTION:
+            Internal function performs following operations:
+                1. Column summary of columns of the dataset
+                2. Statistics of numeric columns of the dataset
+                3. Categorical column summary
+                4. Futile columns in the dataset
+                5. Target column distribution
+                6. Outlier Percentage in numeric columns of the dataset
+        """
+        numerical_columns = []
+        categorical_columns= []
+        date_column_list = []
+        self._display_heading(phase=0)
+        self._display_msg(msg='Feature Exploration started ...')
+        # Detecting numerical and categorical column
+        for col, d_type in self.data._column_names_and_types:
+            if d_type in ['int','float']:
+                numerical_columns.append(col)
+            elif d_type in ['str']:
+                categorical_columns.append(col)
+            elif d_type in ['datetime.date','datetime.datetime']:
+                date_column_list.append(col)
+        # Display initial Count of data
+        self._display_msg(msg = '\nData Overview:', show_data=True)
+        print(f"Total Rows in the data: {self.data.shape[0]}\n"\
+              f"Total Columns in the data: {self.data.shape[1]}")
+        # Displaying date columns
+        if len(date_column_list)!=0:
+            self._display_msg(msg='Identified Date Columns:',
+                             data=date_column_list)
+        # Column Summary of each feature of data
+        # such as null count, datatype, non null count
+        self._column_summary()
+        # Displays statistics such as mean/median/mode
+        self._statistics()
+        # Categorcial Summary and futile column detection
+        if len(categorical_columns) != 0:
+            categorical_obj = self._categorical_summary(categorical_columns)
+            self._futile_column(categorical_obj)
+        # Plot a graph of target column
+        self._target_column_details()
+        # Displays outlier percentage
+        outlier_method = "Tukey"
+        df = self._outlier_detection(outlier_method,numerical_columns)
+    def _statistics(self):
+        """
+        DESCRIPTION:
+            Internal function displays the statistics of numeric columns such mean, mode, median.
+        """
+        # Statistics of numerical columns
+        self._display_msg(msg='\nStatistics of Data:',
+                          data=self.data.describe(),
+                          show_data=True)
+    def _column_summary(self):
+        """
+        DESCRIPTION:
+            Internal function displays the column summary of categorical column such as
+            datatype, null count, non null count, zero count.
+        """
+        # Column Summary of all columns of dataset
+        obj = ColumnSummary(data=self.data,
+                            target_columns=self.data.columns,
+                            volatile=True)
+        self._display_msg(msg='\nColumn Summary:',
+                          data=obj.result,
+                          show_data=True)
+    def _categorical_summary(self,
+                             categorical_columns=None):
+        """
+        DESCRIPTION:
+            Internal function display the categorical summary of categorical column such count, distinct values.
+        PARAMETERS:
+            categorical_columns:
+                Required Argument.
+                Specifies the categorical columns.
+                Types: str or list of strings (str)
+        RETURNS:
+            Instance of ColumnSummary.
+        """
+        self._display_msg(msg='\nCategorical Columns with their Distinct values:',
+                          show_data=True)
+        # Categorical Summary of categorical columns
+        obj = CategoricalSummary(data=self.data,
+                                 target_columns=categorical_columns)
+        catg_obj = obj.result[obj.result['DistinctValue'] != None]
+        print("{:<25} {:<10}".format("ColumnName", "DistinctValueCount"))
+        for col in categorical_columns:
+            dst_val = catg_obj[catg_obj['ColumnName'] == col].size//3
+            print("{:<25} {:<10}".format(col, dst_val))
+        return obj
+    def _futile_column(self,
+                       categorical_obj):
+        """
+        DESCRIPTION:
+            Internal function detects the futile columns.
+        PARAMETERS:
+            categorical_obj:
+                Required Argument.
+                Specifies the instance of CategoricalSummary for futile column detection..
+                Types: Instance of CategoricalSummary
+        """
+        # Futile columns detection using categorical column object
+        gfc_out = GetFutileColumns(data=self.data,
+                                   object=categorical_obj,
+                                   category_summary_column="ColumnName",
+                                   threshold_value=0.7)
+        # Extracts the futile column present in the first column
+        f_cols = [i[0] for i in gfc_out.result.itertuples()]
+        if len(f_cols) == 0:
+            self._display_msg(inline_msg='\nNo Futile columns found.',
+                              show_data=True)
+        else:
+            self._display_msg(msg='\nFutile columns in dataset:',
+                              data=gfc_out.result,
+                              show_data=True)
+    def _target_column_details(self,
+                               plot_data = None):
+        """
+        DESCRIPTION:
+            Internal function displays the target column distribution of Target column/ Response column.
+        PARAMETERS:
+            plot_data:
+                Required Argument.
+                Specifies the input teradataml DataFrame for plotting distribution.
+                Types: teradataml Dataframe
+        """
+        if self._check_visualization_libraries() and not _is_terminal():
+            import matplotlib.pyplot as plt
+            import seaborn as sns
+            if plot_data is None:
+                target_data = self.data.select([self.target_column]).to_pandas()
+            else:
+                target_data = plot_data[[self.target_column]]
+            self._display_msg(msg='\nTarget Column Distribution:',
+                              show_data=True)
+            plt.figure(figsize=(8, 6))
+            # Ploting a histogram for target column
+            plt.hist(target_data, bins=10, density=True, edgecolor='black')
+            plt.xlabel(self.target_column)
+            plt.ylabel('Density')
+            plt.show()
+    def _check_visualization_libraries(self):
+        """
+        DESCRIPTION:
+            Internal function Checks the availability of data visualization libraries.
+        RETURNS:
+            Boolean, True if data visualization libraries are available. Otherwise return False.
+        """
+        # Conditional import
+        try:
+            import matplotlib.pyplot as plt
+            import seaborn as sns
+        except ImportError:
+            print("Install seaborn and matplotlib libraries to visualize the data.")
+            return False
+        return True
+    def _outlier_detection(self,
+                           outlier_method,
+                           column_list,
+                           lower_percentile = None,
+                           upper_percentile = None):
+        """
+        DESCRIPTION:
+            Function detects the outlier in numerical column and display thier percentage.
+        PARAMETERS:
+            outlier_method:
+                Required Argument.
+                Specifies the outlier method required for outlier detection.
+                Types: str
+            column_list:
+                Required Argument.
+                Specifies the numeric columns for outlier percentage calculation.
+                Types: str or list of strings (str)
+            lower_percentile:
+                Optional Argument.
+                Specifies the lower percentile value for outlier detection in case of percentile method.
+                Types: float
+            upper_percentile:
+                Optional Argument.
+                Specifies the upper percentile value for outlier detection in case of percentile method.
+                Types: float
+        RETURNS:
+            Pandas DataFrame containing, column name with outlier percentage.
+        """
+        # Performing outlier fit on the data for replacing outliers with NULL value
+        fit_params = {
+            "data" : self.data,
+            "target_columns" : column_list,
+            "outlier_method" : outlier_method,
+            "lower_percentile" : lower_percentile,
+            "upper_percentile" : upper_percentile,
+            "replacement_value" : 'NULL'
+        }
+        OutlierFilterFit_out = OutlierFilterFit(**fit_params)
+        transform_params = {
+            "data" : self.data,
+            "object" : OutlierFilterFit_out.result
+        }
+        # Performing outlier transformation on each column
+        OutlierTransform_obj = OutlierFilterTransform(**transform_params)
+        # Column summary of each column of the data
+        fit_params = {
+            "data" : OutlierTransform_obj.result,
+            "target_columns" : column_list
+        }
+        colSummary = ColumnSummary(**fit_params)
+        null_count_expr = colSummary.result.NullCount
+        non_null_count_expr = colSummary.result.NonNullCount
+        # Calculating outlier percentage
+        df = colSummary.result.assign(True,
+                                      ColumnName = colSummary.result.ColumnName,
+                                      OutlierPercentage = (null_count_expr/(non_null_count_expr+null_count_expr))*100)
+        # Displaying non-zero containing outlier percentage for columns
+        df = df[df['OutlierPercentage']>0]
+        if self.verbose > 0:
+            print(" "*500, end='\r')
+            if df.shape[0] > 0:
+                self._display_msg(msg='Columns with outlier percentage :-',
+                                  show_data=True)
+                print(df)
+            else:
+                print("\nNo outlier found!")
+        return df
+    def _common_style(self):
+        """
+        DESCRIPTION:
+            Internal Function sets the style tag for HTML.
+        RETURNS:
+            string containing style tag.
+        """
+        style = '''
+            <style>
+                .custom-div {
+                    background-color: lightgray;
+                    color: #000000;
+                    padding: 10px;
+                    border-radius: 8px;
+                    box-shadow: 0 3px 4px rgba(0, 0, 0, 0.2);
+                    margin-bottom: 10px;
+                    text-align: center;
+                }
+            </style>
+        '''
+        return style
+    def _display_heading(self,
+                         phase=0,
+                         progress_bar=None):
+        """
+        DESCRIPTION:
+            Internal function to print the phase of AutoML that
+            completed in green color.
+        PARAMETERS:
+            phase:
+                Optional Argument.
+                Specifies the phase of automl that completed.
+                Types: int
+            progress_bar:
+                Optional Argument.
+                Specifies the _ProgressBar object.
+                Types: object (_ProgressBar)
+        RETURNS:
+            None.
+        """
+        # Phases of automl
+        steps = ["1. Feature Exploration ->", " 2. Feature Engineering ->",
+                 " 3. Data Preparation ->", " 4. Model Training & Evaluation"]
+        # Check verbose > 0
+        if self.verbose > 0:
+            # Check if code is running in IPython enviornment
+            if not self.terminal_print:
+                # Highlightedt phases of automl
+                highlighted_steps = "".join(steps[:phase])
+                # Unhighlighted phases of automl
+                unhighlighted_steps = "".join(steps[phase:])
+                # Combining highlighted and unhighlighted phases
+                msg = self.style + f'<br><div class="custom-div"><h3><span style="color: green;">{highlighted_steps}</span>{unhighlighted_steps}<center></h3></center></div>'
+                # Displaying the msg
+                if progress_bar is not None:
+                    progress_bar.update(msg=msg,
+                                        progress=False,
+                                        ipython=True)
+                else:
+                    display(HTML(msg))
+            else:
+                try:
+                    # Try to import colorama if not already imported
+                    from colorama import Fore, Style, init
+                    # initalize the color package
+                    init()
+                    # Highlight the phases of automl
+                    highlighted_steps = "".join([Fore.GREEN + Style.BRIGHT + step + Style.RESET_ALL for step in steps[:phase]])
+                    # Unhighlighted the phases of automl
+                    unhighlighted_steps = "".join(steps[phase:])
+                    # Combining highlighted and unhighlighted phases
+                    msg = f'{highlighted_steps}{unhighlighted_steps}'
+                except ImportError:
+                    msg = "".join(step for step in steps)
+                if progress_bar is not None:
+                    progress_bar.update(msg=msg,
+                                        progress=False)
+                else:
+                    print(msg)
+    def _display_msg(self,
+                     msg=None,
+                     progress_bar=None,
+                     inline_msg=None,
+                     data=None,
+                     col_lst=None,
+                     show_data=False):
+        """
+        DESCRIPTION:
+            Internal Function to print statement according to
+            environment.
+        PARAMETERS:
+            msg:
+                Optional Argument.
+                Specifies the message to print.
+                Types: str
+            progress_bar:
+                Optional Argument.
+                Specifies the _ProgressBar object.
+                Types: object (_ProgressBar)
+            inline_msg:
+                Optional Argument.
+                Specifies the additional information to print.
+                Types: str
+            data:
+                Optional Argument.
+                Specifies the teradataml dataframe to print.
+                Types: teradataml DataFrame
+            col_lst:
+                Optional Argument.
+                Specifies the list of columns.
+                Types: list of str/int/data.time
+            show_data:
+                Optional Argument.
+                Specifies whether to print msg/data when verbose<2.
+                Default Value: False
+                Types: bool
+        RETURNS:
+            None.
+        """
+        # If verbose level is set to 2
+        if self.verbose == 2:
+            # If a progress bar is provided
+            if progress_bar:
+                # If a message is provided
+                if msg:
+                    # Update the progress bar with the message and either the column list or data (if they are not None)
+                    progress_bar.update(msg=msg, data=col_lst if col_lst else data if data is not None else None,
+                                        progress=False,
+                                        ipython=not self.terminal_print)
+                # If an inline message is provided instead
+                elif inline_msg:
+                    # Update the progress bar with the inline message
+                    progress_bar.update(msg=inline_msg, progress=False)
+            # If no progress bar is provided
+            else:
+                # If a message is provided
+                if msg:
+                    # Print the message
+                    print(f"{msg}")
+                    # If a column list is provided
+                    if col_lst:
+                        # Print the column list
+                        print(col_lst)
+                    # If data is provided instead
+                    elif data is not None:
+                        # Print the data if terminal_print is True, else display the data
+                        print(data) if self.terminal_print else display(data)
+                # If an inline message is provided instead
+                elif inline_msg:
+                    # Print the inline message
+                    print(f'{inline_msg}')
+            # Exit the function after handling verbose level 2
+            return
+        # If verbose level is more than 0 and show_data is True
+        if self.verbose > 0 and show_data:
+            # If a progress bar and a message are provided
+            if progress_bar and msg:
+                # Update the progress bar with the message and data (if data is not None)
+                progress_bar.update(msg=msg, data=data if data is not None else None,
+                                    progress=False, ipython=not self.terminal_print)
+            # If no progress bar is provided
+            else:
+                # If a message is provided
+                if msg:
+                    # Print the message if terminal_print is True, else display the message
+                    print(f'{msg}') if self.terminal_print else display(HTML(f'<h4>{msg}</h4>'))
+                # If data is provided
+                if data is not None:
+                    # Print the data if terminal_print is True, else display the data
+                    print(data) if self.terminal_print else display(data)

teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

Potentially problematic release.

teradataml 17.20.0.6py3-none-any.whl → 20.0.0.0py3-none-any.whl