teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +238 -1
- teradataml/__init__.py +13 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/Transformations.py +4 -4
- teradataml/analytics/__init__.py +0 -2
- teradataml/analytics/analytic_function_executor.py +3 -0
- teradataml/analytics/json_parser/utils.py +13 -12
- teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
- teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
- teradataml/analytics/sqle/__init__.py +0 -13
- teradataml/analytics/utils.py +1 -0
- teradataml/analytics/valib.py +3 -0
- teradataml/automl/__init__.py +1628 -0
- teradataml/automl/custom_json_utils.py +1270 -0
- teradataml/automl/data_preparation.py +993 -0
- teradataml/automl/data_transformation.py +727 -0
- teradataml/automl/feature_engineering.py +1648 -0
- teradataml/automl/feature_exploration.py +547 -0
- teradataml/automl/model_evaluation.py +163 -0
- teradataml/automl/model_training.py +887 -0
- teradataml/catalog/__init__.py +0 -2
- teradataml/catalog/byom.py +49 -6
- teradataml/catalog/function_argument_mapper.py +0 -2
- teradataml/catalog/model_cataloging_utils.py +2 -1021
- teradataml/common/aed_utils.py +6 -2
- teradataml/common/constants.py +50 -58
- teradataml/common/deprecations.py +160 -0
- teradataml/common/garbagecollector.py +61 -104
- teradataml/common/messagecodes.py +27 -36
- teradataml/common/messages.py +11 -15
- teradataml/common/utils.py +205 -287
- teradataml/common/wrapper_utils.py +1 -110
- teradataml/context/context.py +150 -78
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
- teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
- teradataml/data/fish.csv +160 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/insurance.csv +1 -1
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
- teradataml/data/load_example_data.py +3 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/openml_example.json +63 -0
- teradataml/data/scripts/deploy_script.py +65 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
- teradataml/data/templates/open_source_ml.json +9 -0
- teradataml/data/teradataml_example.json +73 -1
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/dataframe/copy_to.py +79 -13
- teradataml/dataframe/data_transfer.py +8 -0
- teradataml/dataframe/dataframe.py +910 -311
- teradataml/dataframe/dataframe_utils.py +102 -5
- teradataml/dataframe/fastload.py +11 -3
- teradataml/dataframe/setop.py +15 -2
- teradataml/dataframe/sql.py +3735 -77
- teradataml/dataframe/sql_function_parameters.py +56 -5
- teradataml/dataframe/vantage_function_types.py +45 -1
- teradataml/dataframe/window.py +30 -29
- teradataml/dbutils/dbutils.py +18 -1
- teradataml/geospatial/geodataframe.py +18 -7
- teradataml/geospatial/geodataframecolumn.py +5 -0
- teradataml/hyperparameter_tuner/optimizer.py +910 -120
- teradataml/hyperparameter_tuner/utils.py +131 -37
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/sklearn/__init__.py +1 -0
- teradataml/opensource/sklearn/_class.py +255 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
- teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
- teradataml/opensource/sklearn/constants.py +54 -0
- teradataml/options/__init__.py +3 -6
- teradataml/options/configure.py +21 -20
- teradataml/scriptmgmt/UserEnv.py +61 -5
- teradataml/scriptmgmt/lls_utils.py +135 -53
- teradataml/table_operators/Apply.py +38 -6
- teradataml/table_operators/Script.py +45 -308
- teradataml/table_operators/TableOperator.py +182 -591
- teradataml/table_operators/__init__.py +0 -1
- teradataml/table_operators/table_operator_util.py +32 -40
- teradataml/utils/validators.py +127 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
- teradataml/analytics/mle/AdaBoost.py +0 -651
- teradataml/analytics/mle/AdaBoostPredict.py +0 -564
- teradataml/analytics/mle/Antiselect.py +0 -342
- teradataml/analytics/mle/Arima.py +0 -641
- teradataml/analytics/mle/ArimaPredict.py +0 -477
- teradataml/analytics/mle/Attribution.py +0 -1070
- teradataml/analytics/mle/Betweenness.py +0 -658
- teradataml/analytics/mle/Burst.py +0 -711
- teradataml/analytics/mle/CCM.py +0 -600
- teradataml/analytics/mle/CCMPrepare.py +0 -324
- teradataml/analytics/mle/CFilter.py +0 -460
- teradataml/analytics/mle/ChangePointDetection.py +0 -572
- teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
- teradataml/analytics/mle/Closeness.py +0 -737
- teradataml/analytics/mle/ConfusionMatrix.py +0 -420
- teradataml/analytics/mle/Correlation.py +0 -477
- teradataml/analytics/mle/Correlation2.py +0 -573
- teradataml/analytics/mle/CoxHazardRatio.py +0 -679
- teradataml/analytics/mle/CoxPH.py +0 -556
- teradataml/analytics/mle/CoxSurvival.py +0 -478
- teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
- teradataml/analytics/mle/DTW.py +0 -623
- teradataml/analytics/mle/DWT.py +0 -564
- teradataml/analytics/mle/DWT2D.py +0 -599
- teradataml/analytics/mle/DecisionForest.py +0 -716
- teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
- teradataml/analytics/mle/DecisionForestPredict.py +0 -561
- teradataml/analytics/mle/DecisionTree.py +0 -830
- teradataml/analytics/mle/DecisionTreePredict.py +0 -528
- teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
- teradataml/analytics/mle/FMeasure.py +0 -402
- teradataml/analytics/mle/FPGrowth.py +0 -734
- teradataml/analytics/mle/FrequentPaths.py +0 -695
- teradataml/analytics/mle/GLM.py +0 -558
- teradataml/analytics/mle/GLML1L2.py +0 -547
- teradataml/analytics/mle/GLML1L2Predict.py +0 -519
- teradataml/analytics/mle/GLMPredict.py +0 -529
- teradataml/analytics/mle/HMMDecoder.py +0 -945
- teradataml/analytics/mle/HMMEvaluator.py +0 -901
- teradataml/analytics/mle/HMMSupervised.py +0 -521
- teradataml/analytics/mle/HMMUnsupervised.py +0 -572
- teradataml/analytics/mle/Histogram.py +0 -561
- teradataml/analytics/mle/IDWT.py +0 -476
- teradataml/analytics/mle/IDWT2D.py +0 -493
- teradataml/analytics/mle/IdentityMatch.py +0 -763
- teradataml/analytics/mle/Interpolator.py +0 -918
- teradataml/analytics/mle/KMeans.py +0 -485
- teradataml/analytics/mle/KNN.py +0 -627
- teradataml/analytics/mle/KNNRecommender.py +0 -488
- teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
- teradataml/analytics/mle/LAR.py +0 -439
- teradataml/analytics/mle/LARPredict.py +0 -478
- teradataml/analytics/mle/LDA.py +0 -548
- teradataml/analytics/mle/LDAInference.py +0 -492
- teradataml/analytics/mle/LDATopicSummary.py +0 -464
- teradataml/analytics/mle/LevenshteinDistance.py +0 -450
- teradataml/analytics/mle/LinReg.py +0 -433
- teradataml/analytics/mle/LinRegPredict.py +0 -438
- teradataml/analytics/mle/MinHash.py +0 -544
- teradataml/analytics/mle/Modularity.py +0 -587
- teradataml/analytics/mle/NEREvaluator.py +0 -410
- teradataml/analytics/mle/NERExtractor.py +0 -595
- teradataml/analytics/mle/NERTrainer.py +0 -458
- teradataml/analytics/mle/NGrams.py +0 -570
- teradataml/analytics/mle/NPath.py +0 -634
- teradataml/analytics/mle/NTree.py +0 -549
- teradataml/analytics/mle/NaiveBayes.py +0 -462
- teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
- teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
- teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
- teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
- teradataml/analytics/mle/NamedEntityFinder.py +0 -529
- teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
- teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
- teradataml/analytics/mle/POSTagger.py +0 -417
- teradataml/analytics/mle/Pack.py +0 -411
- teradataml/analytics/mle/PageRank.py +0 -535
- teradataml/analytics/mle/PathAnalyzer.py +0 -426
- teradataml/analytics/mle/PathGenerator.py +0 -367
- teradataml/analytics/mle/PathStart.py +0 -464
- teradataml/analytics/mle/PathSummarizer.py +0 -470
- teradataml/analytics/mle/Pivot.py +0 -471
- teradataml/analytics/mle/ROC.py +0 -425
- teradataml/analytics/mle/RandomSample.py +0 -637
- teradataml/analytics/mle/RandomWalkSample.py +0 -490
- teradataml/analytics/mle/SAX.py +0 -779
- teradataml/analytics/mle/SVMDense.py +0 -677
- teradataml/analytics/mle/SVMDensePredict.py +0 -536
- teradataml/analytics/mle/SVMDenseSummary.py +0 -437
- teradataml/analytics/mle/SVMSparse.py +0 -557
- teradataml/analytics/mle/SVMSparsePredict.py +0 -553
- teradataml/analytics/mle/SVMSparseSummary.py +0 -435
- teradataml/analytics/mle/Sampling.py +0 -549
- teradataml/analytics/mle/Scale.py +0 -565
- teradataml/analytics/mle/ScaleByPartition.py +0 -496
- teradataml/analytics/mle/ScaleMap.py +0 -378
- teradataml/analytics/mle/ScaleSummary.py +0 -320
- teradataml/analytics/mle/SentenceExtractor.py +0 -363
- teradataml/analytics/mle/SentimentEvaluator.py +0 -432
- teradataml/analytics/mle/SentimentExtractor.py +0 -578
- teradataml/analytics/mle/SentimentTrainer.py +0 -405
- teradataml/analytics/mle/SeriesSplitter.py +0 -641
- teradataml/analytics/mle/Sessionize.py +0 -475
- teradataml/analytics/mle/SimpleMovAvg.py +0 -397
- teradataml/analytics/mle/StringSimilarity.py +0 -425
- teradataml/analytics/mle/TF.py +0 -389
- teradataml/analytics/mle/TFIDF.py +0 -504
- teradataml/analytics/mle/TextChunker.py +0 -414
- teradataml/analytics/mle/TextClassifier.py +0 -399
- teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
- teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
- teradataml/analytics/mle/TextMorph.py +0 -494
- teradataml/analytics/mle/TextParser.py +0 -623
- teradataml/analytics/mle/TextTagger.py +0 -530
- teradataml/analytics/mle/TextTokenizer.py +0 -502
- teradataml/analytics/mle/UnivariateStatistics.py +0 -488
- teradataml/analytics/mle/Unpack.py +0 -526
- teradataml/analytics/mle/Unpivot.py +0 -438
- teradataml/analytics/mle/VarMax.py +0 -776
- teradataml/analytics/mle/VectorDistance.py +0 -762
- teradataml/analytics/mle/WeightedMovAvg.py +0 -400
- teradataml/analytics/mle/XGBoost.py +0 -842
- teradataml/analytics/mle/XGBoostPredict.py +0 -627
- teradataml/analytics/mle/__init__.py +0 -123
- teradataml/analytics/mle/json/adaboost_mle.json +0 -135
- teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
- teradataml/analytics/mle/json/antiselect_mle.json +0 -34
- teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
- teradataml/analytics/mle/json/arima_mle.json +0 -172
- teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
- teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
- teradataml/analytics/mle/json/betweenness_mle.json +0 -97
- teradataml/analytics/mle/json/burst_mle.json +0 -140
- teradataml/analytics/mle/json/ccm_mle.json +0 -124
- teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
- teradataml/analytics/mle/json/cfilter_mle.json +0 -93
- teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
- teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
- teradataml/analytics/mle/json/closeness_mle.json +0 -104
- teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
- teradataml/analytics/mle/json/correlation_mle.json +0 -86
- teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
- teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
- teradataml/analytics/mle/json/coxph_mle.json +0 -98
- teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
- teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
- teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
- teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
- teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
- teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
- teradataml/analytics/mle/json/dtw_mle.json +0 -97
- teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
- teradataml/analytics/mle/json/dwt_mle.json +0 -101
- teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
- teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
- teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
- teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
- teradataml/analytics/mle/json/glm_mle.json +0 -111
- teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
- teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
- teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/histogram_mle.json +0 -100
- teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
- teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
- teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
- teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
- teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
- teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
- teradataml/analytics/mle/json/idwt_mle.json +0 -66
- teradataml/analytics/mle/json/interpolator_mle.json +0 -151
- teradataml/analytics/mle/json/kmeans_mle.json +0 -97
- teradataml/analytics/mle/json/knn_mle.json +0 -141
- teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
- teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
- teradataml/analytics/mle/json/lar_mle.json +0 -78
- teradataml/analytics/mle/json/larpredict_mle.json +0 -69
- teradataml/analytics/mle/json/lda_mle.json +0 -130
- teradataml/analytics/mle/json/ldainference_mle.json +0 -78
- teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
- teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
- teradataml/analytics/mle/json/linreg_mle.json +0 -42
- teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
- teradataml/analytics/mle/json/minhash_mle.json +0 -113
- teradataml/analytics/mle/json/modularity_mle.json +0 -91
- teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
- teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
- teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
- teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
- teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
- teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
- teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
- teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
- teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
- teradataml/analytics/mle/json/ngrams_mle.json +0 -137
- teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
- teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
- teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
- teradataml/analytics/mle/json/pack_mle.json +0 -58
- teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
- teradataml/analytics/mle/json/pagerank_mle.json +0 -81
- teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
- teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
- teradataml/analytics/mle/json/pathstart_mle.json +0 -62
- teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
- teradataml/analytics/mle/json/pivoting_mle.json +0 -71
- teradataml/analytics/mle/json/postagger_mle.json +0 -51
- teradataml/analytics/mle/json/randomsample_mle.json +0 -131
- teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
- teradataml/analytics/mle/json/roc_mle.json +0 -73
- teradataml/analytics/mle/json/sampling_mle.json +0 -75
- teradataml/analytics/mle/json/sax_mle.json +0 -154
- teradataml/analytics/mle/json/scale_mle.json +0 -93
- teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
- teradataml/analytics/mle/json/scalemap_mle.json +0 -44
- teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
- teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
- teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
- teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
- teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
- teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
- teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
- teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
- teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
- teradataml/analytics/mle/json/svmdense_mle.json +0 -165
- teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
- teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
- teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
- teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
- teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
- teradataml/analytics/mle/json/textchunker_mle.json +0 -40
- teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
- teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
- teradataml/analytics/mle/json/textmorph_mle.json +0 -63
- teradataml/analytics/mle/json/textparser_mle.json +0 -166
- teradataml/analytics/mle/json/texttagger_mle.json +0 -81
- teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
- teradataml/analytics/mle/json/tf_mle.json +0 -33
- teradataml/analytics/mle/json/tfidf_mle.json +0 -34
- teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
- teradataml/analytics/mle/json/unpack_mle.json +0 -91
- teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
- teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
- teradataml/analytics/mle/json/varmax_mle.json +0 -176
- teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
- teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
- teradataml/analytics/mle/json/xgboost_mle.json +0 -178
- teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
- teradataml/analytics/sqle/Antiselect.py +0 -321
- teradataml/analytics/sqle/Attribution.py +0 -603
- teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
- teradataml/analytics/sqle/GLMPredict.py +0 -430
- teradataml/analytics/sqle/MovingAverage.py +0 -543
- teradataml/analytics/sqle/NGramSplitter.py +0 -548
- teradataml/analytics/sqle/NPath.py +0 -632
- teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
- teradataml/analytics/sqle/Pack.py +0 -388
- teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
- teradataml/analytics/sqle/Sessionize.py +0 -390
- teradataml/analytics/sqle/StringSimilarity.py +0 -400
- teradataml/analytics/sqle/Unpack.py +0 -503
- teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
- teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
- teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
- teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
- teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
- teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
- teradataml/analytics/sqle/json/npath_sqle.json +0 -67
- teradataml/analytics/sqle/json/pack_sqle.json +0 -47
- teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
- teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
- teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
- teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
- teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
- teradataml/catalog/model_cataloging.py +0 -980
- teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
- teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
- teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
- teradataml/table_operators/sandbox_container_util.py +0 -643
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
|
@@ -308,91 +308,6 @@ def __check_if_client_specific_use(key, function_arg_map, is_sql_name=False):
|
|
|
308
308
|
else:
|
|
309
309
|
return False, None
|
|
310
310
|
|
|
311
|
-
|
|
312
|
-
def __get_model_parameters(model, function_arg_map):
|
|
313
|
-
"""
|
|
314
|
-
DESCRIPTION:
|
|
315
|
-
Internal function to get parameter information of the model to be saved.
|
|
316
|
-
|
|
317
|
-
PARAMETERS:
|
|
318
|
-
model:
|
|
319
|
-
Required argument.
|
|
320
|
-
The model (analytic function object instance) to be saved.
|
|
321
|
-
Types: teradataml Analytic Function object.
|
|
322
|
-
|
|
323
|
-
function_arg_map:
|
|
324
|
-
Required argument.
|
|
325
|
-
The teradataml-sql map for the function obtained using function_argument_mapper.
|
|
326
|
-
Types: dict
|
|
327
|
-
|
|
328
|
-
RETURNS:
|
|
329
|
-
A dict containing the information about parameters passed to model.
|
|
330
|
-
|
|
331
|
-
EXAMPLES:
|
|
332
|
-
>>> model_parameters = __get_model_parameters(model, function_arg_map)
|
|
333
|
-
"""
|
|
334
|
-
parameter_json = {}
|
|
335
|
-
|
|
336
|
-
# Get the attributes that are specific to the SQL syntax of the model algorithm
|
|
337
|
-
sql_specific_attributes = model._get_sql_specific_attributes()
|
|
338
|
-
|
|
339
|
-
# First, let's identify the parameters
|
|
340
|
-
nonsql_argument_counter = 1
|
|
341
|
-
for key in model.__dict__:
|
|
342
|
-
if not key.startswith('_'):
|
|
343
|
-
member = getattr(model, key)
|
|
344
|
-
# Check if this is an attribute, not a DataFrame
|
|
345
|
-
if not isinstance(member, DataFrame) and key != "sqlmr_query":
|
|
346
|
-
# Check if it is a special or client specific argument
|
|
347
|
-
special_use, used_in = __check_if_client_specific_use(key, function_arg_map)
|
|
348
|
-
|
|
349
|
-
value = member
|
|
350
|
-
# Add quotes to Boolean values as they tend to be handled in unintended way with JSON.
|
|
351
|
-
if type(member) == bool or key == famc.TDML_FORMULA_NAME.value:
|
|
352
|
-
value = str(member)
|
|
353
|
-
else:
|
|
354
|
-
if isinstance(member, list):
|
|
355
|
-
# We try to save the list as a string representation that could readily be used,
|
|
356
|
-
# in SQL, and has no language specific representation.
|
|
357
|
-
# Here, we remove the '[' and ']' from the string representation.
|
|
358
|
-
# We also avoid adding quotes around single-item list.
|
|
359
|
-
if len(member) == 1:
|
|
360
|
-
value = str(member[0]) if type(member[0]) == bool else member[0]
|
|
361
|
-
elif len(member) > 1:
|
|
362
|
-
if type(member[0]) == bool:
|
|
363
|
-
member = ['{}'.format(val) for val in member]
|
|
364
|
-
value = str(member).lstrip('[').rstrip(']')
|
|
365
|
-
else:
|
|
366
|
-
# Empty list has no meaning, but no chance of running into this with the validation
|
|
367
|
-
# in the function wrappers.
|
|
368
|
-
value = None
|
|
369
|
-
if value is not None:
|
|
370
|
-
if special_use:
|
|
371
|
-
sql_name = '__nonsql_argument_{}__'.format(nonsql_argument_counter)
|
|
372
|
-
nonsql_argument_counter = nonsql_argument_counter + 1
|
|
373
|
-
else:
|
|
374
|
-
sql_name = __get_arg_sql_name_from_tdml(function_arg_map,arg_type=famc.ARGUMENTS.value,name=key)
|
|
375
|
-
parameter_json[sql_name] = {}
|
|
376
|
-
parameter_json[sql_name]["value"] = value
|
|
377
|
-
parameter_json[sql_name]["client_specific_name"] = key
|
|
378
|
-
|
|
379
|
-
sql_name = '__nonsql_argument_{}__'.format(nonsql_argument_counter)
|
|
380
|
-
parameter_json[sql_name] = {}
|
|
381
|
-
parameter_json[sql_name]["value"] = model.__class__.__name__
|
|
382
|
-
parameter_json[sql_name]["client_specific_name"] = "__class_name__"
|
|
383
|
-
|
|
384
|
-
# Add the SQL specific arguments
|
|
385
|
-
for sql_name in sql_specific_attributes:
|
|
386
|
-
parameter_json[sql_name] = {}
|
|
387
|
-
parameter_json[sql_name]["value"] = sql_specific_attributes[sql_name]
|
|
388
|
-
# Also save the formula related property names for corresponding SQL arguments
|
|
389
|
-
if hasattr(model, '_sql_formula_attribute_mapper'):
|
|
390
|
-
if sql_name in model._sql_formula_attribute_mapper:
|
|
391
|
-
parameter_json[sql_name]["client_specific_name"] = model._sql_formula_attribute_mapper[sql_name]
|
|
392
|
-
|
|
393
|
-
return parameter_json
|
|
394
|
-
|
|
395
|
-
|
|
396
311
|
def __check_if_model_exists(name, created=False, accessible=False,
|
|
397
312
|
raise_error_if_exists=False, raise_error_if_model_not_found=False):
|
|
398
313
|
"""
|
|
@@ -477,89 +392,6 @@ def __check_if_model_exists(name, created=False, accessible=False,
|
|
|
477
392
|
name, ' or not created by user'),
|
|
478
393
|
MessageCodes.MODEL_NOT_FOUND)
|
|
479
394
|
|
|
480
|
-
|
|
481
|
-
def __check_if_model_cataloging_tables_exists(raise_error_if_does_not_exists=True):
|
|
482
|
-
"""
|
|
483
|
-
DESCRIPTION:
|
|
484
|
-
Check whether Model Cataloging tables (one of the views - ModelCataloging.ModelsV) exists or not.
|
|
485
|
-
|
|
486
|
-
PARAMETERS:
|
|
487
|
-
raise_error_if_does_not_exists:
|
|
488
|
-
Optional Argument.
|
|
489
|
-
Specifies the flag to decide whether to raise error when Model Cataloging tables does not exist.
|
|
490
|
-
Default Value: True (Raise exception)
|
|
491
|
-
Types: bool
|
|
492
|
-
|
|
493
|
-
RAISES:
|
|
494
|
-
None.
|
|
495
|
-
|
|
496
|
-
RETURNS:
|
|
497
|
-
True, if the view exists, else False.
|
|
498
|
-
|
|
499
|
-
EXAMPLES:
|
|
500
|
-
>>> __check_if_model_cataloging_tables_exists()
|
|
501
|
-
"""
|
|
502
|
-
# Get current connection().
|
|
503
|
-
conn = get_connection()
|
|
504
|
-
|
|
505
|
-
# Check whether tables exists on the system or not.
|
|
506
|
-
model_table_exists = conn.dialect.has_view(conn, view_name=mac.MODELS.value,
|
|
507
|
-
schema=mac.MODEL_CATALOG_DB.value)
|
|
508
|
-
|
|
509
|
-
# If both tables exist, return True.
|
|
510
|
-
if model_table_exists:
|
|
511
|
-
return True
|
|
512
|
-
|
|
513
|
-
# We are here means the Model Cataloging view does not exist.
|
|
514
|
-
# Let's raise error if 'raise_error_if_does_not_exists' set to True.
|
|
515
|
-
if raise_error_if_does_not_exists:
|
|
516
|
-
# Raise error, as one or both Model Cataloging tables does not exist.
|
|
517
|
-
# MODEL_CATALOGING_TABLE_DOES_EXIST
|
|
518
|
-
raise TeradataMlException(
|
|
519
|
-
Messages.get_message(MessageCodes.MODEL_CATALOGING_TABLE_DOES_EXIST),
|
|
520
|
-
MessageCodes.MODEL_CATALOGING_TABLE_DOES_EXIST)
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
def __get_tables_for_model(name, current_user):
|
|
524
|
-
"""
|
|
525
|
-
DESCRIPTION:
|
|
526
|
-
Function to get model tables for a given model name.
|
|
527
|
-
|
|
528
|
-
PARAMETERS:
|
|
529
|
-
name:
|
|
530
|
-
Required Argument.
|
|
531
|
-
Specifies the name of the model to get the model tables for.
|
|
532
|
-
Types: str
|
|
533
|
-
|
|
534
|
-
current_user:
|
|
535
|
-
Required Argument.
|
|
536
|
-
Specifies the name of the current Vantage user.
|
|
537
|
-
Types: str
|
|
538
|
-
|
|
539
|
-
RETURNS:
|
|
540
|
-
A list of model tables associated with the model.
|
|
541
|
-
|
|
542
|
-
EXAMPLES:
|
|
543
|
-
>>> table_list = __get_tables_for_model(name, current_user)
|
|
544
|
-
"""
|
|
545
|
-
# Get list of tables
|
|
546
|
-
model_object_info = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_OBJECTS.value))
|
|
547
|
-
model_info = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELSX.value))
|
|
548
|
-
model_info = model_info[model_info[mac.CREATED_BY.value].str.lower() == current_user.lower()]
|
|
549
|
-
model_info = model_info[model_info[mac.MODEL_NAME.value] == name]
|
|
550
|
-
model_objects_to_publish = model_info.join(model_object_info,
|
|
551
|
-
on=[model_info.Name == model_object_info.ModelName],
|
|
552
|
-
how='inner').select([mac.MODEL_OBJ_TABLE_NAME.value])
|
|
553
|
-
|
|
554
|
-
model_objects_to_publish = model_objects_to_publish.to_pandas().squeeze()
|
|
555
|
-
if isinstance(model_objects_to_publish, str):
|
|
556
|
-
# If there is only one output table
|
|
557
|
-
return [model_objects_to_publish]
|
|
558
|
-
else:
|
|
559
|
-
# For multiple or no output tables
|
|
560
|
-
return model_objects_to_publish.tolist()
|
|
561
|
-
|
|
562
|
-
|
|
563
395
|
def __get_current_user(conn=None):
|
|
564
396
|
"""
|
|
565
397
|
DESCRIPTION:
|
|
@@ -619,36 +451,6 @@ def __get_like_filter_expression_on_col(metaexpr, column_name, like):
|
|
|
619
451
|
return metaexpr._filter(0, 'like', [column_name], like = like, match_arg='i')
|
|
620
452
|
|
|
621
453
|
|
|
622
|
-
def __get_model_engine(model):
|
|
623
|
-
"""
|
|
624
|
-
DESCRIPTION:
|
|
625
|
-
Internal function to return the engine name on which the model was generated.
|
|
626
|
-
|
|
627
|
-
PARAMETERS:
|
|
628
|
-
model:
|
|
629
|
-
Required Argument.
|
|
630
|
-
Model object, for which engine is to be found.
|
|
631
|
-
Types: str
|
|
632
|
-
|
|
633
|
-
RETURNS:
|
|
634
|
-
Engine name ('ML Engine' or 'Advanced SQL Engine')
|
|
635
|
-
|
|
636
|
-
RAISES:
|
|
637
|
-
TeradataMlException
|
|
638
|
-
|
|
639
|
-
EXAMPLES:
|
|
640
|
-
>>> __get_model_engine(model)
|
|
641
|
-
"""
|
|
642
|
-
if ".mle." in str(type(model)):
|
|
643
|
-
return mac.MODEL_ENGINE_ML.value
|
|
644
|
-
elif ".sqle." in str(type(model)):
|
|
645
|
-
return mac.MODEL_ENGINE_ADVSQL.value
|
|
646
|
-
else:
|
|
647
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.UNKNOWN_MODEL_ENGINE,
|
|
648
|
-
str(type(model))),
|
|
649
|
-
MessageCodes.UNKNOWN_MODEL_ENGINE)
|
|
650
|
-
|
|
651
|
-
|
|
652
454
|
def __get_wrapper_class(model_engine, model_class):
|
|
653
455
|
"""
|
|
654
456
|
DESCRIPTION:
|
|
@@ -674,11 +476,9 @@ def __get_wrapper_class(model_engine, model_class):
|
|
|
674
476
|
AttributeError - When model_class wrapper function, does is not from model_engine.
|
|
675
477
|
|
|
676
478
|
EXAMPLES:
|
|
677
|
-
>>> __get_wrapper_class("
|
|
479
|
+
>>> __get_wrapper_class("SQL Engine", "GLM")
|
|
678
480
|
"""
|
|
679
|
-
if model_engine == mac.
|
|
680
|
-
module_name = "teradataml.analytics.mle"
|
|
681
|
-
elif model_engine == mac.MODEL_ENGINE_ADVSQL.value:
|
|
481
|
+
if model_engine == mac.MODEL_ENGINE_ADVSQL.value:
|
|
682
482
|
module_name = "teradataml.analytics.sqle"
|
|
683
483
|
else:
|
|
684
484
|
raise ValueError("Invalid Engine found in Model Cataloging table.")
|
|
@@ -688,823 +488,4 @@ def __get_wrapper_class(model_engine, model_class):
|
|
|
688
488
|
return getattr(wrapper_module, model_class)
|
|
689
489
|
|
|
690
490
|
|
|
691
|
-
def __is_view(tablename):
|
|
692
|
-
"""
|
|
693
|
-
DESCRIPTION:
|
|
694
|
-
Internal function to check whether the object is view or not.
|
|
695
|
-
|
|
696
|
-
PARAMETERS:
|
|
697
|
-
tablename:
|
|
698
|
-
Required Argument.
|
|
699
|
-
Table name or view name to be checked.
|
|
700
|
-
Types: str
|
|
701
|
-
|
|
702
|
-
RAISES:
|
|
703
|
-
None.
|
|
704
|
-
|
|
705
|
-
RETURNS:
|
|
706
|
-
True when the tablename is view, else false.
|
|
707
|
-
|
|
708
|
-
EXAMPLES:
|
|
709
|
-
>>> __is_view('"dbaname"."tablename"')
|
|
710
|
-
"""
|
|
711
|
-
db_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_db_name(tablename), "\"")
|
|
712
|
-
table_view_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_table_name(tablename), "\"")
|
|
713
|
-
query = SQLBundle._build_select_table_kind(db_name, "{0}".format(table_view_name), "'V'")
|
|
714
|
-
|
|
715
|
-
pdf = pd.read_sql(query, get_context())
|
|
716
|
-
if pdf.shape[0] > 0:
|
|
717
|
-
return True
|
|
718
|
-
else:
|
|
719
|
-
return False
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def __delete_model_tableview(tableviewname):
|
|
723
|
-
"""
|
|
724
|
-
DESCRIPTION:
|
|
725
|
-
Internal function to remove table name or view.
|
|
726
|
-
|
|
727
|
-
PARAMETERS:
|
|
728
|
-
tableviewname:
|
|
729
|
-
Required Argument.
|
|
730
|
-
Table name or view name to be deleted.
|
|
731
|
-
Types: str
|
|
732
|
-
|
|
733
|
-
RAISES:
|
|
734
|
-
None.
|
|
735
|
-
|
|
736
|
-
RETURNS:
|
|
737
|
-
bool
|
|
738
|
-
|
|
739
|
-
EXAMPLES:
|
|
740
|
-
>>> __delete_model_tableview('"dbname"."tableviewname"')
|
|
741
|
-
"""
|
|
742
|
-
if not __is_view(tableviewname):
|
|
743
|
-
try:
|
|
744
|
-
UtilFuncs._drop_table(tableviewname)
|
|
745
|
-
except:
|
|
746
|
-
return False
|
|
747
|
-
else:
|
|
748
|
-
try:
|
|
749
|
-
UtilFuncs._drop_view(tableviewname)
|
|
750
|
-
except:
|
|
751
|
-
return False
|
|
752
|
-
|
|
753
|
-
return True
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
def __get_all_formula_related_args(function_arg_map):
|
|
757
|
-
"""
|
|
758
|
-
DESCRIPTION:
|
|
759
|
-
Internal function to find a list of all formula related arguments for a function.
|
|
760
|
-
|
|
761
|
-
PARAMETERS:
|
|
762
|
-
function_arg_map:
|
|
763
|
-
Required Argument.
|
|
764
|
-
The teradataml-sql map for the function obtained using function_argument_mapper.
|
|
765
|
-
Types: dict
|
|
766
|
-
|
|
767
|
-
RETURNS:
|
|
768
|
-
A dictionary mapping all SQL Arguments for the function related to formula to its role in formula.
|
|
769
|
-
|
|
770
|
-
EXAMPLE:
|
|
771
|
-
>>> __get_all_formula_related_args(function_arg_map)
|
|
772
|
-
"""
|
|
773
|
-
formula_args = {}
|
|
774
|
-
for arg_name in function_arg_map[famc.ARGUMENTS.value][famc.SQL_TO_TDML.value]:
|
|
775
|
-
arg = function_arg_map[famc.ARGUMENTS.value][famc.SQL_TO_TDML.value][arg_name]
|
|
776
|
-
# Ignore alternate names
|
|
777
|
-
if famc.ALTERNATE_TO.value in arg:
|
|
778
|
-
alternate_name = arg[famc.ALTERNATE_TO.value]
|
|
779
|
-
arg = function_arg_map[famc.ARGUMENTS.value][famc.SQL_TO_TDML.value][alternate_name]
|
|
780
|
-
|
|
781
|
-
if famc.USED_IN_FORMULA.value in arg:
|
|
782
|
-
formula_args[arg_name] = {}
|
|
783
|
-
formula_args[arg_name][famc.USED_IN_FORMULA.value] = arg[famc.USED_IN_FORMULA.value]
|
|
784
|
-
formula_args[arg_name]['arg_value'] = None
|
|
785
|
-
|
|
786
|
-
return formula_args
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
def __fix_imbalanced_quotes(arg):
|
|
790
|
-
"""
|
|
791
|
-
DESCRIPTION:
|
|
792
|
-
Internal function to fix imbalanced quotes around a string.
|
|
793
|
-
|
|
794
|
-
PARAMETERS:
|
|
795
|
-
arg:
|
|
796
|
-
Required Argument.
|
|
797
|
-
The string to fix the imbalanced quotes for, if any.
|
|
798
|
-
Types: str
|
|
799
|
-
|
|
800
|
-
RETURNS:
|
|
801
|
-
The input string with any imbalanced quotes stripped.
|
|
802
|
-
|
|
803
|
-
EXAMPLE:
|
|
804
|
-
>>> __fix_imbalanced_quotes('hello"')
|
|
805
|
-
hello
|
|
806
|
-
"""
|
|
807
|
-
for quote in ["'", '"']:
|
|
808
|
-
if (arg.startswith(quote) and not arg.endswith(quote)) or (not arg.startswith(quote) and arg.endswith(quote)):
|
|
809
|
-
return arg.strip(quote)
|
|
810
|
-
|
|
811
|
-
return arg
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
def __get_tdml_parameter_value_for_sequence(function_arg_map, attr_value):
|
|
815
|
-
"""
|
|
816
|
-
DESCRIPTION:
|
|
817
|
-
Internal function to form sequence_column teradataml argument from SQL arguments.
|
|
818
|
-
|
|
819
|
-
PARAMETERS:
|
|
820
|
-
function_arg_map:
|
|
821
|
-
Required Argument.
|
|
822
|
-
The teradataml-sql map for the function obtained using function_argument_mapper.
|
|
823
|
-
Types: dict
|
|
824
|
-
|
|
825
|
-
attr_value:
|
|
826
|
-
Required Argument.
|
|
827
|
-
The value of the SQL sequence argument.
|
|
828
|
-
|
|
829
|
-
RETURNS:
|
|
830
|
-
A dictionary mapping the teradataml sequence argument to its values.
|
|
831
|
-
|
|
832
|
-
EXAMPLES:
|
|
833
|
-
>>> tdml_sequence_args = __get_tdml_parameter_value_for_sequence(function_arg_map, sql_sequence_arg)
|
|
834
|
-
"""
|
|
835
|
-
sequence_dict = {}
|
|
836
|
-
tdml_name = None
|
|
837
|
-
for column in attr_value.split(','):
|
|
838
|
-
if len(column) == 0:
|
|
839
|
-
continue
|
|
840
|
-
if ':' in column:
|
|
841
|
-
input_name, col_val = column.split(':')
|
|
842
|
-
input_name = __fix_imbalanced_quotes(input_name)
|
|
843
|
-
col_val = __fix_imbalanced_quotes(col_val)
|
|
844
|
-
tdml_name = '{}_{}'.format(__get_arg_tdml_name_from_sql(function_arg_map, famc.INPUTS.value,
|
|
845
|
-
input_name.lower()),
|
|
846
|
-
'sequence_column')
|
|
847
|
-
tdml_name = __fix_imbalanced_quotes(tdml_name)
|
|
848
|
-
sequence_dict[tdml_name] = [col_val]
|
|
849
|
-
else:
|
|
850
|
-
if tdml_name not in sequence_dict:
|
|
851
|
-
# This means there is only one input and the input name was not specified in the
|
|
852
|
-
# SequenceInputBy clause. So we get the only input name.
|
|
853
|
-
tdml_name = list(function_arg_map[famc.INPUTS.value][famc.TDML_TO_SQL.value].keys())[0]
|
|
854
|
-
tdml_name = '{}_{}'.format(tdml_name, 'sequence_column')
|
|
855
|
-
sequence_dict[tdml_name] = []
|
|
856
|
-
column = __fix_imbalanced_quotes(column)
|
|
857
|
-
sequence_dict[tdml_name].append(column)
|
|
858
|
-
|
|
859
|
-
return sequence_dict
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
def __get_target_column(name):
|
|
863
|
-
"""
|
|
864
|
-
DESCRIPTION:
|
|
865
|
-
Internal function to get the target column of a saved model.
|
|
866
|
-
|
|
867
|
-
PARAMETERS:
|
|
868
|
-
name:
|
|
869
|
-
Required Argument.
|
|
870
|
-
Specifies the name used to save the model.
|
|
871
|
-
Types: str
|
|
872
|
-
|
|
873
|
-
RETURNS:
|
|
874
|
-
A String representing the name of the target column.
|
|
875
|
-
|
|
876
|
-
EXAMPLES:
|
|
877
|
-
>>> target_column = __get_target_column('GLMModel')
|
|
878
|
-
"""
|
|
879
|
-
model_details = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_DETAILSX.value))
|
|
880
|
-
model_details = model_details[model_details[mac.MODEL_DERIVED_NAME.value] == name]
|
|
881
|
-
target_column = model_details.select([mac.MODEL_DERIVED_TARGET_COLUMN.value]).squeeze()
|
|
882
|
-
return target_column
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
def __get_tdml_parameter_value_for_formula(formula_args, target_column):
|
|
886
|
-
"""
|
|
887
|
-
DESCRIPTION:
|
|
888
|
-
Internal function to build the formula argument based on the SQL equivalent inputs.
|
|
889
|
-
|
|
890
|
-
PARAMETERS:
|
|
891
|
-
formula_args:
|
|
892
|
-
Required Argument.
|
|
893
|
-
A dictionary mapping all SQL Arguments for the function related to formula to its role in formula.
|
|
894
|
-
Types: dict
|
|
895
|
-
|
|
896
|
-
target_column:
|
|
897
|
-
Required Argument.
|
|
898
|
-
The target column for the model, if any.
|
|
899
|
-
Types: str
|
|
900
|
-
|
|
901
|
-
RETURNS:
|
|
902
|
-
A String representing the formula argument to be used with teradataml.
|
|
903
|
-
|
|
904
|
-
EXAMPLES:
|
|
905
|
-
>>> formula = __get_tdml_parameter_value_for_formula(formula_args, target_column)
|
|
906
|
-
"""
|
|
907
|
-
dependent_var = target_column
|
|
908
|
-
all_vars = []
|
|
909
|
-
|
|
910
|
-
for arg in formula_args:
|
|
911
|
-
if formula_args[arg]['arg_value'] is not None:
|
|
912
|
-
if formula_args[arg][famc.USED_IN_FORMULA.value] == famc.DEPENDENT_ATTR.value:
|
|
913
|
-
dependent_var = formula_args[arg]['arg_value'].strip("'")
|
|
914
|
-
else:
|
|
915
|
-
all_vars.extend(formula_args[arg]['arg_value'].split(','))
|
|
916
|
-
|
|
917
|
-
# Remove duplicates
|
|
918
|
-
all_vars = list(set(all_vars))
|
|
919
|
-
all_vars = [var.strip("'") for var in all_vars]
|
|
920
|
-
|
|
921
|
-
# Remove dependent variable if it occurs in all_vars
|
|
922
|
-
if dependent_var in all_vars:
|
|
923
|
-
all_vars.pop(all_vars.index(dependent_var))
|
|
924
|
-
|
|
925
|
-
formula = '{} ~ {}'.format(dependent_var, ' + '. join(all_vars))
|
|
926
|
-
return formula
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
def __cast_arg_values_to_tdml_types(value, type_):
|
|
930
|
-
"""
|
|
931
|
-
DESCRIPTION:
|
|
932
|
-
Internal function used by retrieve_model() to cast the retrieved model parameters to the expected python types.
|
|
933
|
-
|
|
934
|
-
PARAMETERS:
|
|
935
|
-
value:
|
|
936
|
-
Required Argument.
|
|
937
|
-
Specifies the value retrieved that needs a type cast.
|
|
938
|
-
Types: str
|
|
939
|
-
|
|
940
|
-
type_:
|
|
941
|
-
Required Argument.
|
|
942
|
-
Specifies the Python type the value needs to be cast to.
|
|
943
|
-
Type: Python type or tuple of Python types
|
|
944
|
-
|
|
945
|
-
RETURNS:
|
|
946
|
-
The value cast to the required Python type.
|
|
947
|
-
|
|
948
|
-
RAISES:
|
|
949
|
-
None
|
|
950
|
-
|
|
951
|
-
EXAMPLE:
|
|
952
|
-
>>> cast_value = __cast_arg_values_to_tdml_types('0.1', float)
|
|
953
|
-
"""
|
|
954
|
-
return_value = None
|
|
955
|
-
required_type = type_
|
|
956
|
-
|
|
957
|
-
accepted_bool_values = ['1', 't', 'true', 'y', 'yes']
|
|
958
|
-
|
|
959
|
-
# If the required_type is a tuple, we need to consider the possibility of the value being a list
|
|
960
|
-
if isinstance(required_type, tuple):
|
|
961
|
-
# The function_argument_mapper adds the type of the object in the list as the first value in the tuple
|
|
962
|
-
required_type = required_type[0]
|
|
963
|
-
|
|
964
|
-
# Use regex to split the string value into a list.
|
|
965
|
-
# This is required only when we expect the values to be a list as well, in which case,
|
|
966
|
-
# the 'value' will be a comma-separated list of strings.
|
|
967
|
-
# The pattern matches anything but whitespace and comma and not in quotes, or anything in quotes,
|
|
968
|
-
# basically avoiding splitting on a comma when surrounded by quotes.
|
|
969
|
-
pattern = r"[^',\s]+|'[^']*'"
|
|
970
|
-
values = re.findall(pattern, value)
|
|
971
|
-
if len(values) > 1:
|
|
972
|
-
if required_type == bool:
|
|
973
|
-
# Remove the quotes surrounding items in a list,
|
|
974
|
-
# and check for their presence in the acceptable TRUE values.
|
|
975
|
-
return_value = [val.strip().strip("'").lower() in accepted_bool_values for val in values]
|
|
976
|
-
else:
|
|
977
|
-
# Remove the quotes surrounding items in a list cast them to the required type.
|
|
978
|
-
return_value = [required_type(val.strip().strip("'")) for val in values]
|
|
979
|
-
else:
|
|
980
|
-
value = values[0]
|
|
981
|
-
|
|
982
|
-
if return_value is None:
|
|
983
|
-
if required_type == bool:
|
|
984
|
-
# Remove the quotes surrounding the value,
|
|
985
|
-
# and check for their presence in the acceptable TRUE values.
|
|
986
|
-
return_value = value.strip().strip("'").lower() in accepted_bool_values
|
|
987
|
-
else:
|
|
988
|
-
# Remove the quotes surrounding the value and cast it to the required type.
|
|
989
|
-
return_value = required_type(value.strip().strip("'"))
|
|
990
|
-
|
|
991
|
-
return return_value
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
def __get_model_access(name):
|
|
995
|
-
"""
|
|
996
|
-
DESCRIPTION:
|
|
997
|
-
Internal function to get the current access level of a saved model.
|
|
998
|
-
|
|
999
|
-
PARAMETERS:
|
|
1000
|
-
name:
|
|
1001
|
-
Required Argument.
|
|
1002
|
-
Specifies the name of the saved model to get the access level for.
|
|
1003
|
-
Types: str
|
|
1004
|
-
|
|
1005
|
-
RAISES:
|
|
1006
|
-
None.
|
|
1007
|
-
|
|
1008
|
-
RETURNS:
|
|
1009
|
-
A String representing the access level of the saved model.
|
|
1010
|
-
|
|
1011
|
-
EXAMPLES:
|
|
1012
|
-
>>> __get_model_access('saved_glm_model')
|
|
1013
|
-
"""
|
|
1014
|
-
df = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_DETAILS.value))
|
|
1015
|
-
return df[df[mac.MODEL_DERIVED_NAME.value] == name].select([mac.MODEL_ACCESS.value]).squeeze()
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
def __get_tdml_type_for_tdml_arg(name, function_arg_map):
|
|
1019
|
-
"""
|
|
1020
|
-
DESCRIPTION:
|
|
1021
|
-
Internal function to get the Python type for the given teradataml model class attribute.
|
|
1022
|
-
|
|
1023
|
-
PARAMETERS:
|
|
1024
|
-
name:
|
|
1025
|
-
Required Argument.
|
|
1026
|
-
Specifies the teradataml name for the attribute to get the expected python type for.
|
|
1027
|
-
Types: str
|
|
1028
|
-
|
|
1029
|
-
function_arg_map:
|
|
1030
|
-
Required Argument.
|
|
1031
|
-
Specifies the teradataml-sql map for the function obtained using function_argument_mapper.
|
|
1032
|
-
Types: dict
|
|
1033
|
-
|
|
1034
|
-
RETURNS:
|
|
1035
|
-
Python type for the given teradataml model class attribute name.
|
|
1036
|
-
None when argument name not found.
|
|
1037
|
-
|
|
1038
|
-
RAISES:
|
|
1039
|
-
None
|
|
1040
|
-
|
|
1041
|
-
EXAMPLES:
|
|
1042
|
-
>>> from teradataml.catalog.function_argument_mapper import _argument_mapper
|
|
1043
|
-
>>> function_arg_map = _argument_mapper._get_function_map('ML Engine', 'glm')
|
|
1044
|
-
>>> tdml_type = __get_tdml_type_for_tdml_arg('linkfunction', function_arg_map)
|
|
1045
|
-
"""
|
|
1046
|
-
# Let's check if the function argument mapper has the information about the argument we are looking for.
|
|
1047
|
-
# If not, let's return None.
|
|
1048
|
-
if name not in function_arg_map[famc.ARGUMENTS.value][famc.TDML_TO_SQL.value]:
|
|
1049
|
-
return None
|
|
1050
|
-
|
|
1051
|
-
tdml_type = str
|
|
1052
|
-
sql_name = function_arg_map[famc.ARGUMENTS.value][famc.TDML_TO_SQL.value][name]
|
|
1053
|
-
|
|
1054
|
-
# We can ignore formula letting it default to str
|
|
1055
|
-
special_use, used_in = __check_if_client_specific_use(name, function_arg_map)
|
|
1056
|
-
if not special_use or used_in == famc.USED_IN_SEQUENCE_INPUT_BY.value:
|
|
1057
|
-
tdml_type = function_arg_map[famc.ARGUMENTS.value][famc.SQL_TO_TDML.value][sql_name][famc.TDML_TYPE.value]
|
|
1058
|
-
|
|
1059
|
-
return tdml_type
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
def __retrieve_model_class(name, model_client, function_arg_map):
|
|
1063
|
-
"""
|
|
1064
|
-
DESCRIPTION:
|
|
1065
|
-
Internal function to get the teradataml class used for generating model given it's name.
|
|
1066
|
-
|
|
1067
|
-
PARAMETERS:
|
|
1068
|
-
name:
|
|
1069
|
-
Optional Argument. Required when model was saved by teradataml.
|
|
1070
|
-
Specifies the name of the model to retrieve the model attributes and output information for.
|
|
1071
|
-
Types: str
|
|
1072
|
-
|
|
1073
|
-
model_client:
|
|
1074
|
-
Required Argument.
|
|
1075
|
-
Specified the name of the client used to generate the model.
|
|
1076
|
-
Types: str
|
|
1077
|
-
|
|
1078
|
-
function_arg_map:
|
|
1079
|
-
Optional Argument. Required when model was not saved by teradataml.
|
|
1080
|
-
Specifies the teradataml-sql map for the function obtained using function_argument_mapper.
|
|
1081
|
-
Types: dict
|
|
1082
|
-
|
|
1083
|
-
RETURNS:
|
|
1084
|
-
A String representing the teradataml class name corresponding to the model.
|
|
1085
|
-
|
|
1086
|
-
EXAMPLES:
|
|
1087
|
-
>>> model_class = __retrieve_model_class(name, model_client, function_arg_map)
|
|
1088
|
-
"""
|
|
1089
|
-
if model_client == mac.MODEL_TDML.value:
|
|
1090
|
-
# Create DF on top of ModelAttributesV view
|
|
1091
|
-
model_arguments = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_ATTRS.value))
|
|
1092
|
-
model_arguments = model_arguments[model_arguments[mac.MODEL_DERIVED_NAME.value] == name]
|
|
1093
|
-
model_class = model_arguments[model_arguments.ClientSpecificAttributeName.str.
|
|
1094
|
-
contains(mac.MODEL_CLIENT_CLASS_KEY.value) == 1].\
|
|
1095
|
-
select([mac.MODEL_ATTR_VALUE.value]).squeeze()
|
|
1096
|
-
else:
|
|
1097
|
-
model_class = function_arg_map[famc.FUNCTION_TDML_NAME.value]
|
|
1098
|
-
|
|
1099
|
-
return model_class
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
def __retrieve_model_client_engine_algorithm(name, return_details=False):
|
|
1103
|
-
"""
|
|
1104
|
-
DESCRIPTION:
|
|
1105
|
-
Internal function to get the the model generating engine, client, algorithm, and optionally the model details
|
|
1106
|
-
given the model name.
|
|
1107
|
-
|
|
1108
|
-
PARAMETERS:
|
|
1109
|
-
name:
|
|
1110
|
-
Required Argument.
|
|
1111
|
-
Specifies the name of the model to retrieve the model attributes and output information for.
|
|
1112
|
-
Types: str
|
|
1113
|
-
|
|
1114
|
-
return_details:
|
|
1115
|
-
Optional Argument.
|
|
1116
|
-
Specifies whether to also return the row from the ModelDetailsV corresponding to the model.
|
|
1117
|
-
Types: bool
|
|
1118
|
-
Default Value: False
|
|
1119
|
-
|
|
1120
|
-
RETURNS:
|
|
1121
|
-
A tuple containing:
|
|
1122
|
-
* the name of the client that was used to generate the model,
|
|
1123
|
-
* the name of the engine that generated the model, and
|
|
1124
|
-
* the name of the algorithm used to generate the model.
|
|
1125
|
-
* If return_details=True, then additionally, the ModelDetailsV row related to the model.
|
|
1126
|
-
|
|
1127
|
-
EXAMPLES:
|
|
1128
|
-
>>> model_client, model_engine, algorithm = __retrieve_model_client_engine_algorithm(name)
|
|
1129
|
-
"""
|
|
1130
|
-
|
|
1131
|
-
model_details = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_DETAILS.value))
|
|
1132
|
-
model_details = model_details[model_details[mac.MODEL_DERIVED_NAME.value] == name]
|
|
1133
|
-
model_algorithm = model_details.select([mac.MODEL_DERIVED_ALGORITHM.value]).squeeze().lower()
|
|
1134
|
-
|
|
1135
|
-
model_client_and_eng = model_details.select([mac.MODEL_DERIVED_GENCLIENT.value,
|
|
1136
|
-
mac.MODEL_DERIVED_GENENG.value]).squeeze()
|
|
1137
|
-
|
|
1138
|
-
model_client = model_client_and_eng.select([mac.MODEL_DERIVED_GENCLIENT.value]).squeeze()
|
|
1139
|
-
model_engine = model_client_and_eng.select([mac.MODEL_DERIVED_GENENG.value]).squeeze()
|
|
1140
|
-
|
|
1141
|
-
if return_details:
|
|
1142
|
-
return model_client, model_engine, model_algorithm, model_details
|
|
1143
|
-
else:
|
|
1144
|
-
return model_client, model_engine, model_algorithm
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
def __retrieve_model_attributes(name, model_client, function_arg_map):
|
|
1148
|
-
"""
|
|
1149
|
-
DESCRIPTION:
|
|
1150
|
-
Internal function to get the the attributes used for generating model given it's name.
|
|
1151
|
-
|
|
1152
|
-
PARAMETERS:
|
|
1153
|
-
name:
|
|
1154
|
-
Required Argument.
|
|
1155
|
-
Specifies the name of the model to retrieve the model attributes and output information for.
|
|
1156
|
-
Types: str
|
|
1157
|
-
|
|
1158
|
-
model_client:
|
|
1159
|
-
Required Argument.
|
|
1160
|
-
Specified the name of the engine that generated the model.
|
|
1161
|
-
Types: str
|
|
1162
|
-
|
|
1163
|
-
function_arg_map:
|
|
1164
|
-
Required Argument.
|
|
1165
|
-
Specifies the teradataml-sql map for the function obtained using function_argument_mapper.
|
|
1166
|
-
Types: dict
|
|
1167
|
-
|
|
1168
|
-
RETURNS:
|
|
1169
|
-
A tuple of dictionaries:
|
|
1170
|
-
* the first one containing the attribute names and their values, and
|
|
1171
|
-
* the second one containing the formula related properties and their values, if the model saving client was
|
|
1172
|
-
teradataml.
|
|
1173
|
-
|
|
1174
|
-
EXAMPLES:
|
|
1175
|
-
>>> model_parameters, formula_related_params = __retrieve_model_attributes(name, model_client, function_arg_map)
|
|
1176
|
-
"""
|
|
1177
|
-
# Create DF on top of ModelAttributesV view and
|
|
1178
|
-
# 1. get only rows related to the model named 'name'.
|
|
1179
|
-
model_arguments = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_ATTRS.value))
|
|
1180
|
-
model_arguments = model_arguments[model_arguments[mac.MODEL_DERIVED_NAME.value] == name]
|
|
1181
|
-
|
|
1182
|
-
if model_client == mac.MODEL_TDML.value:
|
|
1183
|
-
attr_name_to_use = mac.MODEL_ATTR_CLIENT_NAME.value
|
|
1184
|
-
else:
|
|
1185
|
-
attr_name_to_use = mac.MODEL_ATTR_NAME.value
|
|
1186
|
-
|
|
1187
|
-
if model_client != mac.MODEL_TDML.value:
|
|
1188
|
-
# 2. Filter out the the row where "AttributeName" is not like __nonsql_argument_
|
|
1189
|
-
model_arguments = model_arguments.assign(notSqlonly=model_arguments.AttributeName.str.contains('__nonsql_argument_'))
|
|
1190
|
-
model_arguments = model_arguments[model_arguments.notSqlonly == 0]
|
|
1191
|
-
else:
|
|
1192
|
-
# 2. Filter out the the row where "ClientSpecificAttributeName" is not NULL
|
|
1193
|
-
model_arguments = model_arguments[model_arguments[attr_name_to_use] != None]
|
|
1194
|
-
|
|
1195
|
-
# Make sure the non-lazy view exists before SQLAlchemy construct can be used
|
|
1196
|
-
if model_arguments._table_name is None:
|
|
1197
|
-
model_arguments._table_name = df_utils._execute_node_return_db_object_name(model_arguments._nodeid,
|
|
1198
|
-
model_arguments._metaexpr)
|
|
1199
|
-
|
|
1200
|
-
# Since lengthier arguments can be a clob column, casting the smaller to clob
|
|
1201
|
-
# to select one of the two as applicable without values being truncated.
|
|
1202
|
-
select_expression = [model_arguments[attr_name_to_use].expression.label("AttrName"),
|
|
1203
|
-
case_when([(model_arguments[mac.MODEL_ATTR_VALUE.value].expression == None,
|
|
1204
|
-
model_arguments[mac.MODEL_ATTR_VALUEC.value].expression)],
|
|
1205
|
-
else_=func.cast(model_arguments[mac.MODEL_ATTR_VALUE.value].expression,
|
|
1206
|
-
type_=CLOB)).expression.label("AttrValue")]
|
|
1207
|
-
|
|
1208
|
-
# Get the final list of AttNames (Client/SQL) and their values (CLOB type)
|
|
1209
|
-
final_list = DataFrame.from_query(str(select(select_expression).compile(compile_kwargs={"literal_binds": True})))
|
|
1210
|
-
|
|
1211
|
-
# Model Parameters
|
|
1212
|
-
final_list = final_list[final_list["AttrName"] != mac.MODEL_CLIENT_CLASS_KEY.value]
|
|
1213
|
-
params = final_list.to_pandas().to_dict()
|
|
1214
|
-
model_parameters = {}
|
|
1215
|
-
formula_related_params = {}
|
|
1216
|
-
|
|
1217
|
-
index_len = len(params["AttrName"])
|
|
1218
|
-
if model_client == mac.MODEL_TDML.value:
|
|
1219
|
-
for i in range(index_len):
|
|
1220
|
-
# Check if the arguments are related to formula
|
|
1221
|
-
if params["AttrName"][i] == '__all_columns':
|
|
1222
|
-
formula_related_params['__all_columns'] = __cast_arg_values_to_tdml_types(params["AttrValue"][i],
|
|
1223
|
-
(str, list))
|
|
1224
|
-
elif params["AttrName"][i] == '__numeric_columns':
|
|
1225
|
-
formula_related_params['__numeric_columns'] = __cast_arg_values_to_tdml_types(params["AttrValue"][i],
|
|
1226
|
-
(str, list))
|
|
1227
|
-
elif params["AttrName"][i] == '__categorical_columns':
|
|
1228
|
-
formula_related_params['__categorical_columns'] = __cast_arg_values_to_tdml_types(params["AttrValue"]
|
|
1229
|
-
[i], (str, list))
|
|
1230
|
-
elif params["AttrName"][i] == '__response_column':
|
|
1231
|
-
formula_related_params['__response_column'] = __cast_arg_values_to_tdml_types(params["AttrValue"][i],
|
|
1232
|
-
str)
|
|
1233
|
-
else:
|
|
1234
|
-
tdml_type = __get_tdml_type_for_tdml_arg(params["AttrName"][i], function_arg_map)
|
|
1235
|
-
# tdml_type can be None when we do not have information about the argument in
|
|
1236
|
-
# the function argument mapper. Let's ignore it in the retrieval.
|
|
1237
|
-
if tdml_type is not None:
|
|
1238
|
-
model_parameters[params["AttrName"][i]] = __cast_arg_values_to_tdml_types(params["AttrValue"][i],
|
|
1239
|
-
tdml_type)
|
|
1240
|
-
else:
|
|
1241
|
-
formula_args = None
|
|
1242
|
-
for i in range(index_len):
|
|
1243
|
-
model_param_name = __get_arg_tdml_name_from_sql(function_arg_map,
|
|
1244
|
-
arg_type=famc.ARGUMENTS.value,
|
|
1245
|
-
name=params["AttrName"][i].lower())
|
|
1246
|
-
|
|
1247
|
-
attr_value = params["AttrValue"][i]
|
|
1248
|
-
|
|
1249
|
-
special_use, used_in = __check_if_client_specific_use(params["AttrName"][i].lower(),
|
|
1250
|
-
function_arg_map, is_sql_name=True)
|
|
1251
|
-
if special_use:
|
|
1252
|
-
if used_in == famc.USED_IN_FORMULA.value:
|
|
1253
|
-
# Get formula
|
|
1254
|
-
if formula_args is None:
|
|
1255
|
-
formula_args = __get_all_formula_related_args(function_arg_map)
|
|
1256
|
-
formula_args[params["AttrName"][i].lower()]['arg_value'] = attr_value
|
|
1257
|
-
else:
|
|
1258
|
-
# Get dictionary of sequence_column arguments
|
|
1259
|
-
sequence_by = __get_tdml_parameter_value_for_sequence(function_arg_map, attr_value)
|
|
1260
|
-
if sequence_by:
|
|
1261
|
-
for seq_key in sequence_by:
|
|
1262
|
-
model_parameters[seq_key] = sequence_by[seq_key]
|
|
1263
|
-
else:
|
|
1264
|
-
# tdml_name can be None when we do not have information about the SQL argument in
|
|
1265
|
-
# the function argument mapper. Let's ignore it in the retrieval.
|
|
1266
|
-
if model_param_name is None:
|
|
1267
|
-
warnings.warn(Messages.get_message(MessageCodes.CANNOT_TRANSLATE_TO_TDML_NAME,
|
|
1268
|
-
params["AttrName"][i]))
|
|
1269
|
-
continue
|
|
1270
|
-
model_param_type = model_param_name[famc.TDML_TYPE.value]
|
|
1271
|
-
model_param_name = model_param_name[famc.TDML_NAME.value]
|
|
1272
|
-
model_parameters[model_param_name] = __cast_arg_values_to_tdml_types(attr_value,
|
|
1273
|
-
model_param_type)
|
|
1274
|
-
|
|
1275
|
-
if formula_args is not None:
|
|
1276
|
-
formula = __get_tdml_parameter_value_for_formula(formula_args, __get_target_column(name))
|
|
1277
|
-
model_parameters[famc.TDML_FORMULA_NAME.value] = formula
|
|
1278
|
-
|
|
1279
|
-
return model_parameters, formula_related_params
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
def __retrieve_model_outputs(name, model_client, function_arg_map):
|
|
1283
|
-
"""
|
|
1284
|
-
DESCRIPTION:
|
|
1285
|
-
Internal function to get the output DataFrames corresponding to a saved model given it's name.
|
|
1286
|
-
|
|
1287
|
-
PARAMETERS:
|
|
1288
|
-
name:
|
|
1289
|
-
Required Argument.
|
|
1290
|
-
Specifies the name of the model to retrieve the model output information for.
|
|
1291
|
-
Types: str
|
|
1292
|
-
|
|
1293
|
-
model_client:
|
|
1294
|
-
Required Argument.
|
|
1295
|
-
Specifies the name of the client that generated the model.
|
|
1296
|
-
Types: str
|
|
1297
|
-
|
|
1298
|
-
function_arg_map:
|
|
1299
|
-
Required Argument.
|
|
1300
|
-
Specifies the teradataml-sql map for the function obtained using function_argument_mapper.
|
|
1301
|
-
Types: dict
|
|
1302
|
-
|
|
1303
|
-
RAISES:
|
|
1304
|
-
TeradataMlException
|
|
1305
|
-
|
|
1306
|
-
RETURNS:
|
|
1307
|
-
A Pandas DataFrame with the teradataml specific name for the output, and the underlying
|
|
1308
|
-
table names corresponding to them.
|
|
1309
|
-
|
|
1310
|
-
EXAMPLES:
|
|
1311
|
-
>>> output_df = __retrieve_model_outputs(name, model_client, function_arg_map)
|
|
1312
|
-
"""
|
|
1313
|
-
# Let's also get the output table map
|
|
1314
|
-
model_outputs = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_OBJECTS.value))
|
|
1315
|
-
model_outputs = model_outputs[model_outputs[mac.MODEL_DERIVED_NAME.value] == name]
|
|
1316
|
-
if model_client == mac.MODEL_TDML.value:
|
|
1317
|
-
model_outputs = model_outputs.assign(drop_columns=True,
|
|
1318
|
-
OutputName=model_outputs[mac.MODEL_OBJ_CLIENT_NAME.value],
|
|
1319
|
-
OutputTableName=model_outputs[mac.MODEL_OBJ_TABLE_NAME.value]).to_pandas()
|
|
1320
|
-
else:
|
|
1321
|
-
model_outputs = model_outputs.assign(drop_columns=True,
|
|
1322
|
-
OutputName=model_outputs[mac.MODEL_OBJ_NAME.value],
|
|
1323
|
-
OutputTableName=model_outputs[mac.MODEL_OBJ_TABLE_NAME.value]).to_pandas()
|
|
1324
|
-
output_names = []
|
|
1325
|
-
output_table_names = []
|
|
1326
|
-
index_len = len(model_outputs["OutputName"])
|
|
1327
|
-
for i in range(index_len):
|
|
1328
|
-
output_name = __get_arg_tdml_name_from_sql(function_arg_map, famc.OUTPUTS.value,
|
|
1329
|
-
model_outputs["OutputName"][i].lower())
|
|
1330
|
-
# We raise an exception when we are not able to get the teradataml name
|
|
1331
|
-
# for the SQL name of the output table.
|
|
1332
|
-
if output_name is None:
|
|
1333
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.CANNOT_TRANSLATE_TO_TDML_NAME),
|
|
1334
|
-
MessageCodes.CANNOT_TRANSLATE_TO_TDML_NAME)
|
|
1335
|
-
output_names.append(output_name)
|
|
1336
|
-
output_table_names.append(model_outputs["OutputTableName"][i])
|
|
1337
|
-
model_outputs = pd.DataFrame({'OutputName': output_names, 'OutputTableName': output_table_names})
|
|
1338
|
-
|
|
1339
|
-
return model_outputs
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
def __retrieve_model_inputs(name, model_client, function_arg_map):
|
|
1343
|
-
"""
|
|
1344
|
-
DESCRIPTION:
|
|
1345
|
-
Internal function to get the input DataFrames corresponding to a saved model given it's name.
|
|
1346
|
-
|
|
1347
|
-
PARAMETERS:
|
|
1348
|
-
name:
|
|
1349
|
-
Required Argument.
|
|
1350
|
-
Specifies the name of the model to retrieve the model input information for.
|
|
1351
|
-
Types: str
|
|
1352
|
-
|
|
1353
|
-
model_client:
|
|
1354
|
-
Required Argument.
|
|
1355
|
-
Specifies the name of the client that generated the model.
|
|
1356
|
-
Types: str
|
|
1357
|
-
|
|
1358
|
-
function_arg_map:
|
|
1359
|
-
Required Argument.
|
|
1360
|
-
Specifies the teradataml-sql map for the function obtained using function_argument_mapper.
|
|
1361
|
-
Types: dict
|
|
1362
|
-
|
|
1363
|
-
RETURNS:
|
|
1364
|
-
A dict mapping the teradataml specific name for the input to actual input DataFrame.
|
|
1365
|
-
The dictionary is of the following form:
|
|
1366
|
-
{
|
|
1367
|
-
<tdml_input_name> :
|
|
1368
|
-
{
|
|
1369
|
-
'TableName' : <actual_table_name>,
|
|
1370
|
-
'NRows': <number of rows>,
|
|
1371
|
-
'NCols': <number of columns>
|
|
1372
|
-
}
|
|
1373
|
-
}
|
|
1374
|
-
|
|
1375
|
-
EXAMPLES:
|
|
1376
|
-
>>> input_info = __retrieve_model_inputs(name, model_client, function_arg_map)
|
|
1377
|
-
"""
|
|
1378
|
-
model_inputs = {}
|
|
1379
|
-
|
|
1380
|
-
# First get the model_id
|
|
1381
|
-
model_id = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS.value))
|
|
1382
|
-
model_id = model_id[model_id[mac.MODEL_NAME.value] == name].select([mac.MODEL_ID.value]).squeeze().item()
|
|
1383
|
-
|
|
1384
|
-
# Now find the inputs related to the model
|
|
1385
|
-
model_training_data = DataFrame(in_schema(mac.MODEL_CATALOG_DB.value, mac.MODELS_INPUTSX.value))
|
|
1386
|
-
model_training_data = model_training_data[model_training_data[mac.MODEL_ID.value] == model_id]
|
|
1387
|
-
model_training_data = model_training_data.to_pandas().to_dict()
|
|
1388
|
-
|
|
1389
|
-
index_len = len(model_training_data[mac.MODEL_ID.value])
|
|
1390
|
-
for i in range(index_len):
|
|
1391
|
-
if model_client == mac.MODEL_TDML.value:
|
|
1392
|
-
input_name = model_training_data[mac.MODEL_INPUT_CLIENT_NAME.value][i]
|
|
1393
|
-
table_name = model_training_data[mac.MODEL_INPUT_TABLE_NAME.value][i]
|
|
1394
|
-
else:
|
|
1395
|
-
input_name = __get_arg_tdml_name_from_sql(function_arg_map, famc.INPUTS.value,
|
|
1396
|
-
model_training_data[mac.MODEL_INPUT_NAME.value][i].lower())
|
|
1397
|
-
# if input_name is None then we have been unable to get the tdml name from the SQL name
|
|
1398
|
-
# for the input. In this case, we ignore the input initialization and continue.
|
|
1399
|
-
if input_name is None:
|
|
1400
|
-
warnings.warn(Messages.get_message(MessageCodes.CANNOT_TRANSLATE_TO_TDML_NAME,
|
|
1401
|
-
model_training_data[mac.MODEL_INPUT_NAME.value][i]))
|
|
1402
|
-
continue
|
|
1403
|
-
table_name = model_training_data[mac.MODEL_INPUT_TABLE_NAME.value][i]
|
|
1404
|
-
|
|
1405
|
-
# No need for further processing if the TableName associated with an input is None.
|
|
1406
|
-
if table_name is None:
|
|
1407
|
-
continue
|
|
1408
|
-
|
|
1409
|
-
nrows = model_training_data[mac.MODEL_INPUT_NROWS.value][i]
|
|
1410
|
-
ncols = model_training_data[mac.MODEL_INPUT_NCOLS.value][i]
|
|
1411
|
-
|
|
1412
|
-
model_inputs[input_name] = {}
|
|
1413
|
-
model_inputs[input_name][mac.MODEL_OBJ_TABLE_NAME.value] = table_name
|
|
1414
|
-
model_inputs[input_name][mac.MODEL_INPUT_NROWS.value] = nrows
|
|
1415
|
-
model_inputs[input_name][mac.MODEL_INPUT_NCOLS.value] = ncols
|
|
1416
|
-
|
|
1417
|
-
return model_inputs
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
def __retrieve_argument_and_output_map(name):
|
|
1421
|
-
"""
|
|
1422
|
-
DESCRIPTION:
|
|
1423
|
-
Internal function to get the teradataml function class corresponding to the model to retrieve,
|
|
1424
|
-
along with the attributes and output objects to initialize the model with.
|
|
1425
|
-
|
|
1426
|
-
PARAMETERS:
|
|
1427
|
-
name:
|
|
1428
|
-
Required Argument.
|
|
1429
|
-
Specifies the name of the model to retrieve the model attributes and output information for.
|
|
1430
|
-
Types: str
|
|
1431
|
-
|
|
1432
|
-
RETURNS:
|
|
1433
|
-
A tuple with the following elements:
|
|
1434
|
-
* the function class to initialize for the model,
|
|
1435
|
-
* the model generating engine to help with the initialization, and
|
|
1436
|
-
* the dictionary containing the attributes and their values including output table objects.
|
|
1437
|
-
|
|
1438
|
-
EXAMPLES:
|
|
1439
|
-
>>> model_class, model_engine, attribute_dictionary = __retrieve_argument_and_output_map(name)
|
|
1440
|
-
"""
|
|
1441
|
-
# First, let's get the model engine, client, algorithm, and other details.
|
|
1442
|
-
model_client, model_engine, model_algorithm, model_details = __retrieve_model_client_engine_algorithm(name, True)
|
|
1443
|
-
|
|
1444
|
-
# Get the build_time, algorithm_name/model_class, target_column, prediction_type to be returned later as parameters.
|
|
1445
|
-
# model_algorithm is also use to figure out the Python class to be instantiated.
|
|
1446
|
-
build_time = model_details.select([mac.MODEL_DERIVED_BUILD_TIME.value]).squeeze()
|
|
1447
|
-
prediction_type = model_details.select([mac.MODEL_DERIVED_PREDICTION_TYPE.value]).squeeze()
|
|
1448
|
-
target_column = model_details.select([mac.MODEL_DERIVED_TARGET_COLUMN.value]).squeeze()
|
|
1449
|
-
|
|
1450
|
-
# Get the teradataml model class corresponding to the model
|
|
1451
|
-
function_arg_map = _argument_mapper._get_function_map(engine=model_engine,
|
|
1452
|
-
function_name=model_algorithm.lower())
|
|
1453
|
-
model_class = __retrieve_model_class(name, model_client, function_arg_map)
|
|
1454
|
-
|
|
1455
|
-
# Get the model attributes and formula related arguments
|
|
1456
|
-
model_parameters, formula_related_args = __retrieve_model_attributes(name, model_client, function_arg_map)
|
|
1457
|
-
# Also append the algorithm_name, build_time, target_column, and prediction_type for the function
|
|
1458
|
-
model_parameters['__algorithm_name'] = model_algorithm
|
|
1459
|
-
if build_time is not None:
|
|
1460
|
-
model_parameters['__build_time'] = build_time.item()
|
|
1461
|
-
if target_column is not None:
|
|
1462
|
-
model_parameters['__target_column'] = target_column
|
|
1463
|
-
if prediction_type is not None:
|
|
1464
|
-
model_parameters['__prediction_type'] = prediction_type
|
|
1465
|
-
|
|
1466
|
-
# Merge the formula related arguments
|
|
1467
|
-
model_parameters = {**model_parameters, **formula_related_args}
|
|
1468
|
-
|
|
1469
|
-
# Try plugging in the input DataFrames as well
|
|
1470
|
-
model_inputs = __retrieve_model_inputs(name, model_client, function_arg_map)
|
|
1471
|
-
for input_name in model_inputs:
|
|
1472
|
-
table_name = model_inputs[input_name][mac.MODEL_OBJ_TABLE_NAME.value]
|
|
1473
|
-
sname = UtilFuncs._extract_db_name(table_name)
|
|
1474
|
-
tname = UtilFuncs._extract_table_name(table_name)
|
|
1475
|
-
|
|
1476
|
-
# Add quoted around the DB and Table names if necessary.
|
|
1477
|
-
tdp = preparer(td_dialect)
|
|
1478
|
-
if sname is not None:
|
|
1479
|
-
sname = tdp.quote(UtilFuncs._teradata_unquote_arg(sname, quote='"'))
|
|
1480
|
-
if tname is not None:
|
|
1481
|
-
tname = tdp.quote(UtilFuncs._teradata_unquote_arg(tname, quote='"'))
|
|
1482
|
-
|
|
1483
|
-
# Try creating the input DataFrames
|
|
1484
|
-
try:
|
|
1485
|
-
if sname is None:
|
|
1486
|
-
input = DataFrame(tname)
|
|
1487
|
-
else:
|
|
1488
|
-
input = DataFrame(in_schema(sname, tname))
|
|
1489
|
-
|
|
1490
|
-
model_inputs[input_name] = input
|
|
1491
|
-
except Exception as err:
|
|
1492
|
-
# We are most likely not able to create a DataFrame on the input as the input may no longer be existent.
|
|
1493
|
-
# In this case, we just initialize it to None.
|
|
1494
|
-
warnings.warn("Unable to fetch input details for the '{}' argument "
|
|
1495
|
-
"from underlying object named '{}'".format(input_name, table_name))
|
|
1496
|
-
model_inputs[input_name] = None
|
|
1497
|
-
|
|
1498
|
-
# Let's also get the output table map
|
|
1499
|
-
model_outputs = __retrieve_model_outputs(name, model_client, function_arg_map)
|
|
1500
|
-
tables = model_outputs.to_dict()
|
|
1501
|
-
model_tables = {}
|
|
1502
|
-
index_len = len(tables["OutputName"])
|
|
1503
|
-
for i in range(index_len):
|
|
1504
|
-
output_name = tables["OutputName"][i]
|
|
1505
|
-
model_tables[output_name] = tables["OutputTableName"][i]
|
|
1506
|
-
|
|
1507
|
-
return model_class, model_engine, {**model_inputs, **model_parameters, **model_tables}
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
491
|
from teradataml.dataframe.dataframe import DataFrame, in_schema
|