teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +238 -1
- teradataml/__init__.py +13 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/Transformations.py +4 -4
- teradataml/analytics/__init__.py +0 -2
- teradataml/analytics/analytic_function_executor.py +3 -0
- teradataml/analytics/json_parser/utils.py +13 -12
- teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
- teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
- teradataml/analytics/sqle/__init__.py +0 -13
- teradataml/analytics/utils.py +1 -0
- teradataml/analytics/valib.py +3 -0
- teradataml/automl/__init__.py +1628 -0
- teradataml/automl/custom_json_utils.py +1270 -0
- teradataml/automl/data_preparation.py +993 -0
- teradataml/automl/data_transformation.py +727 -0
- teradataml/automl/feature_engineering.py +1648 -0
- teradataml/automl/feature_exploration.py +547 -0
- teradataml/automl/model_evaluation.py +163 -0
- teradataml/automl/model_training.py +887 -0
- teradataml/catalog/__init__.py +0 -2
- teradataml/catalog/byom.py +49 -6
- teradataml/catalog/function_argument_mapper.py +0 -2
- teradataml/catalog/model_cataloging_utils.py +2 -1021
- teradataml/common/aed_utils.py +6 -2
- teradataml/common/constants.py +50 -58
- teradataml/common/deprecations.py +160 -0
- teradataml/common/garbagecollector.py +61 -104
- teradataml/common/messagecodes.py +27 -36
- teradataml/common/messages.py +11 -15
- teradataml/common/utils.py +205 -287
- teradataml/common/wrapper_utils.py +1 -110
- teradataml/context/context.py +150 -78
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
- teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
- teradataml/data/fish.csv +160 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/insurance.csv +1 -1
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
- teradataml/data/load_example_data.py +3 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/openml_example.json +63 -0
- teradataml/data/scripts/deploy_script.py +65 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
- teradataml/data/templates/open_source_ml.json +9 -0
- teradataml/data/teradataml_example.json +73 -1
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/dataframe/copy_to.py +79 -13
- teradataml/dataframe/data_transfer.py +8 -0
- teradataml/dataframe/dataframe.py +910 -311
- teradataml/dataframe/dataframe_utils.py +102 -5
- teradataml/dataframe/fastload.py +11 -3
- teradataml/dataframe/setop.py +15 -2
- teradataml/dataframe/sql.py +3735 -77
- teradataml/dataframe/sql_function_parameters.py +56 -5
- teradataml/dataframe/vantage_function_types.py +45 -1
- teradataml/dataframe/window.py +30 -29
- teradataml/dbutils/dbutils.py +18 -1
- teradataml/geospatial/geodataframe.py +18 -7
- teradataml/geospatial/geodataframecolumn.py +5 -0
- teradataml/hyperparameter_tuner/optimizer.py +910 -120
- teradataml/hyperparameter_tuner/utils.py +131 -37
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/sklearn/__init__.py +1 -0
- teradataml/opensource/sklearn/_class.py +255 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
- teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
- teradataml/opensource/sklearn/constants.py +54 -0
- teradataml/options/__init__.py +3 -6
- teradataml/options/configure.py +21 -20
- teradataml/scriptmgmt/UserEnv.py +61 -5
- teradataml/scriptmgmt/lls_utils.py +135 -53
- teradataml/table_operators/Apply.py +38 -6
- teradataml/table_operators/Script.py +45 -308
- teradataml/table_operators/TableOperator.py +182 -591
- teradataml/table_operators/__init__.py +0 -1
- teradataml/table_operators/table_operator_util.py +32 -40
- teradataml/utils/validators.py +127 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
- teradataml/analytics/mle/AdaBoost.py +0 -651
- teradataml/analytics/mle/AdaBoostPredict.py +0 -564
- teradataml/analytics/mle/Antiselect.py +0 -342
- teradataml/analytics/mle/Arima.py +0 -641
- teradataml/analytics/mle/ArimaPredict.py +0 -477
- teradataml/analytics/mle/Attribution.py +0 -1070
- teradataml/analytics/mle/Betweenness.py +0 -658
- teradataml/analytics/mle/Burst.py +0 -711
- teradataml/analytics/mle/CCM.py +0 -600
- teradataml/analytics/mle/CCMPrepare.py +0 -324
- teradataml/analytics/mle/CFilter.py +0 -460
- teradataml/analytics/mle/ChangePointDetection.py +0 -572
- teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
- teradataml/analytics/mle/Closeness.py +0 -737
- teradataml/analytics/mle/ConfusionMatrix.py +0 -420
- teradataml/analytics/mle/Correlation.py +0 -477
- teradataml/analytics/mle/Correlation2.py +0 -573
- teradataml/analytics/mle/CoxHazardRatio.py +0 -679
- teradataml/analytics/mle/CoxPH.py +0 -556
- teradataml/analytics/mle/CoxSurvival.py +0 -478
- teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
- teradataml/analytics/mle/DTW.py +0 -623
- teradataml/analytics/mle/DWT.py +0 -564
- teradataml/analytics/mle/DWT2D.py +0 -599
- teradataml/analytics/mle/DecisionForest.py +0 -716
- teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
- teradataml/analytics/mle/DecisionForestPredict.py +0 -561
- teradataml/analytics/mle/DecisionTree.py +0 -830
- teradataml/analytics/mle/DecisionTreePredict.py +0 -528
- teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
- teradataml/analytics/mle/FMeasure.py +0 -402
- teradataml/analytics/mle/FPGrowth.py +0 -734
- teradataml/analytics/mle/FrequentPaths.py +0 -695
- teradataml/analytics/mle/GLM.py +0 -558
- teradataml/analytics/mle/GLML1L2.py +0 -547
- teradataml/analytics/mle/GLML1L2Predict.py +0 -519
- teradataml/analytics/mle/GLMPredict.py +0 -529
- teradataml/analytics/mle/HMMDecoder.py +0 -945
- teradataml/analytics/mle/HMMEvaluator.py +0 -901
- teradataml/analytics/mle/HMMSupervised.py +0 -521
- teradataml/analytics/mle/HMMUnsupervised.py +0 -572
- teradataml/analytics/mle/Histogram.py +0 -561
- teradataml/analytics/mle/IDWT.py +0 -476
- teradataml/analytics/mle/IDWT2D.py +0 -493
- teradataml/analytics/mle/IdentityMatch.py +0 -763
- teradataml/analytics/mle/Interpolator.py +0 -918
- teradataml/analytics/mle/KMeans.py +0 -485
- teradataml/analytics/mle/KNN.py +0 -627
- teradataml/analytics/mle/KNNRecommender.py +0 -488
- teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
- teradataml/analytics/mle/LAR.py +0 -439
- teradataml/analytics/mle/LARPredict.py +0 -478
- teradataml/analytics/mle/LDA.py +0 -548
- teradataml/analytics/mle/LDAInference.py +0 -492
- teradataml/analytics/mle/LDATopicSummary.py +0 -464
- teradataml/analytics/mle/LevenshteinDistance.py +0 -450
- teradataml/analytics/mle/LinReg.py +0 -433
- teradataml/analytics/mle/LinRegPredict.py +0 -438
- teradataml/analytics/mle/MinHash.py +0 -544
- teradataml/analytics/mle/Modularity.py +0 -587
- teradataml/analytics/mle/NEREvaluator.py +0 -410
- teradataml/analytics/mle/NERExtractor.py +0 -595
- teradataml/analytics/mle/NERTrainer.py +0 -458
- teradataml/analytics/mle/NGrams.py +0 -570
- teradataml/analytics/mle/NPath.py +0 -634
- teradataml/analytics/mle/NTree.py +0 -549
- teradataml/analytics/mle/NaiveBayes.py +0 -462
- teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
- teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
- teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
- teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
- teradataml/analytics/mle/NamedEntityFinder.py +0 -529
- teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
- teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
- teradataml/analytics/mle/POSTagger.py +0 -417
- teradataml/analytics/mle/Pack.py +0 -411
- teradataml/analytics/mle/PageRank.py +0 -535
- teradataml/analytics/mle/PathAnalyzer.py +0 -426
- teradataml/analytics/mle/PathGenerator.py +0 -367
- teradataml/analytics/mle/PathStart.py +0 -464
- teradataml/analytics/mle/PathSummarizer.py +0 -470
- teradataml/analytics/mle/Pivot.py +0 -471
- teradataml/analytics/mle/ROC.py +0 -425
- teradataml/analytics/mle/RandomSample.py +0 -637
- teradataml/analytics/mle/RandomWalkSample.py +0 -490
- teradataml/analytics/mle/SAX.py +0 -779
- teradataml/analytics/mle/SVMDense.py +0 -677
- teradataml/analytics/mle/SVMDensePredict.py +0 -536
- teradataml/analytics/mle/SVMDenseSummary.py +0 -437
- teradataml/analytics/mle/SVMSparse.py +0 -557
- teradataml/analytics/mle/SVMSparsePredict.py +0 -553
- teradataml/analytics/mle/SVMSparseSummary.py +0 -435
- teradataml/analytics/mle/Sampling.py +0 -549
- teradataml/analytics/mle/Scale.py +0 -565
- teradataml/analytics/mle/ScaleByPartition.py +0 -496
- teradataml/analytics/mle/ScaleMap.py +0 -378
- teradataml/analytics/mle/ScaleSummary.py +0 -320
- teradataml/analytics/mle/SentenceExtractor.py +0 -363
- teradataml/analytics/mle/SentimentEvaluator.py +0 -432
- teradataml/analytics/mle/SentimentExtractor.py +0 -578
- teradataml/analytics/mle/SentimentTrainer.py +0 -405
- teradataml/analytics/mle/SeriesSplitter.py +0 -641
- teradataml/analytics/mle/Sessionize.py +0 -475
- teradataml/analytics/mle/SimpleMovAvg.py +0 -397
- teradataml/analytics/mle/StringSimilarity.py +0 -425
- teradataml/analytics/mle/TF.py +0 -389
- teradataml/analytics/mle/TFIDF.py +0 -504
- teradataml/analytics/mle/TextChunker.py +0 -414
- teradataml/analytics/mle/TextClassifier.py +0 -399
- teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
- teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
- teradataml/analytics/mle/TextMorph.py +0 -494
- teradataml/analytics/mle/TextParser.py +0 -623
- teradataml/analytics/mle/TextTagger.py +0 -530
- teradataml/analytics/mle/TextTokenizer.py +0 -502
- teradataml/analytics/mle/UnivariateStatistics.py +0 -488
- teradataml/analytics/mle/Unpack.py +0 -526
- teradataml/analytics/mle/Unpivot.py +0 -438
- teradataml/analytics/mle/VarMax.py +0 -776
- teradataml/analytics/mle/VectorDistance.py +0 -762
- teradataml/analytics/mle/WeightedMovAvg.py +0 -400
- teradataml/analytics/mle/XGBoost.py +0 -842
- teradataml/analytics/mle/XGBoostPredict.py +0 -627
- teradataml/analytics/mle/__init__.py +0 -123
- teradataml/analytics/mle/json/adaboost_mle.json +0 -135
- teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
- teradataml/analytics/mle/json/antiselect_mle.json +0 -34
- teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
- teradataml/analytics/mle/json/arima_mle.json +0 -172
- teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
- teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
- teradataml/analytics/mle/json/betweenness_mle.json +0 -97
- teradataml/analytics/mle/json/burst_mle.json +0 -140
- teradataml/analytics/mle/json/ccm_mle.json +0 -124
- teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
- teradataml/analytics/mle/json/cfilter_mle.json +0 -93
- teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
- teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
- teradataml/analytics/mle/json/closeness_mle.json +0 -104
- teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
- teradataml/analytics/mle/json/correlation_mle.json +0 -86
- teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
- teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
- teradataml/analytics/mle/json/coxph_mle.json +0 -98
- teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
- teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
- teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
- teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
- teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
- teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
- teradataml/analytics/mle/json/dtw_mle.json +0 -97
- teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
- teradataml/analytics/mle/json/dwt_mle.json +0 -101
- teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
- teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
- teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
- teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
- teradataml/analytics/mle/json/glm_mle.json +0 -111
- teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
- teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
- teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/histogram_mle.json +0 -100
- teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
- teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
- teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
- teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
- teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
- teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
- teradataml/analytics/mle/json/idwt_mle.json +0 -66
- teradataml/analytics/mle/json/interpolator_mle.json +0 -151
- teradataml/analytics/mle/json/kmeans_mle.json +0 -97
- teradataml/analytics/mle/json/knn_mle.json +0 -141
- teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
- teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
- teradataml/analytics/mle/json/lar_mle.json +0 -78
- teradataml/analytics/mle/json/larpredict_mle.json +0 -69
- teradataml/analytics/mle/json/lda_mle.json +0 -130
- teradataml/analytics/mle/json/ldainference_mle.json +0 -78
- teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
- teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
- teradataml/analytics/mle/json/linreg_mle.json +0 -42
- teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
- teradataml/analytics/mle/json/minhash_mle.json +0 -113
- teradataml/analytics/mle/json/modularity_mle.json +0 -91
- teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
- teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
- teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
- teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
- teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
- teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
- teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
- teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
- teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
- teradataml/analytics/mle/json/ngrams_mle.json +0 -137
- teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
- teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
- teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
- teradataml/analytics/mle/json/pack_mle.json +0 -58
- teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
- teradataml/analytics/mle/json/pagerank_mle.json +0 -81
- teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
- teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
- teradataml/analytics/mle/json/pathstart_mle.json +0 -62
- teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
- teradataml/analytics/mle/json/pivoting_mle.json +0 -71
- teradataml/analytics/mle/json/postagger_mle.json +0 -51
- teradataml/analytics/mle/json/randomsample_mle.json +0 -131
- teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
- teradataml/analytics/mle/json/roc_mle.json +0 -73
- teradataml/analytics/mle/json/sampling_mle.json +0 -75
- teradataml/analytics/mle/json/sax_mle.json +0 -154
- teradataml/analytics/mle/json/scale_mle.json +0 -93
- teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
- teradataml/analytics/mle/json/scalemap_mle.json +0 -44
- teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
- teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
- teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
- teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
- teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
- teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
- teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
- teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
- teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
- teradataml/analytics/mle/json/svmdense_mle.json +0 -165
- teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
- teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
- teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
- teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
- teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
- teradataml/analytics/mle/json/textchunker_mle.json +0 -40
- teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
- teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
- teradataml/analytics/mle/json/textmorph_mle.json +0 -63
- teradataml/analytics/mle/json/textparser_mle.json +0 -166
- teradataml/analytics/mle/json/texttagger_mle.json +0 -81
- teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
- teradataml/analytics/mle/json/tf_mle.json +0 -33
- teradataml/analytics/mle/json/tfidf_mle.json +0 -34
- teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
- teradataml/analytics/mle/json/unpack_mle.json +0 -91
- teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
- teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
- teradataml/analytics/mle/json/varmax_mle.json +0 -176
- teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
- teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
- teradataml/analytics/mle/json/xgboost_mle.json +0 -178
- teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
- teradataml/analytics/sqle/Antiselect.py +0 -321
- teradataml/analytics/sqle/Attribution.py +0 -603
- teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
- teradataml/analytics/sqle/GLMPredict.py +0 -430
- teradataml/analytics/sqle/MovingAverage.py +0 -543
- teradataml/analytics/sqle/NGramSplitter.py +0 -548
- teradataml/analytics/sqle/NPath.py +0 -632
- teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
- teradataml/analytics/sqle/Pack.py +0 -388
- teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
- teradataml/analytics/sqle/Sessionize.py +0 -390
- teradataml/analytics/sqle/StringSimilarity.py +0 -400
- teradataml/analytics/sqle/Unpack.py +0 -503
- teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
- teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
- teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
- teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
- teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
- teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
- teradataml/analytics/sqle/json/npath_sqle.json +0 -67
- teradataml/analytics/sqle/json/pack_sqle.json +0 -47
- teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
- teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
- teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
- teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
- teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
- teradataml/catalog/model_cataloging.py +0 -980
- teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
- teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
- teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
- teradataml/table_operators/sandbox_container_util.py +0 -643
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
|
@@ -13,6 +13,9 @@
|
|
|
13
13
|
# ##################################################################
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
|
+
import time
|
|
17
|
+
import uuid
|
|
18
|
+
from math import floor
|
|
16
19
|
import tarfile
|
|
17
20
|
import subprocess
|
|
18
21
|
from pathlib import Path
|
|
@@ -495,177 +498,9 @@ class TableOperator:
|
|
|
495
498
|
awu_matrix_returns.append(["values in returns", self.returns[key], False, self._supported_returns_datatypes])
|
|
496
499
|
_Validators._validate_function_arguments(awu_matrix_returns)
|
|
497
500
|
|
|
498
|
-
def setup_test_env(self, docker_image_location):
|
|
499
|
-
"""
|
|
500
|
-
DESCRIPTION:
|
|
501
|
-
Function enables user to load already downloaded sandbox image.
|
|
502
|
-
This will enable users to run the Python scripts on client machine outside of
|
|
503
|
-
Open Analytics Framework.
|
|
504
|
-
|
|
505
|
-
PARAMETERS:
|
|
506
|
-
docker_image_location:
|
|
507
|
-
Required Argument.
|
|
508
|
-
Specifies the location of image on user's system.
|
|
509
|
-
Types: str
|
|
510
|
-
Note:
|
|
511
|
-
For location to download docker image refer teradataml User Guide.
|
|
512
|
-
|
|
513
|
-
RETURNS:
|
|
514
|
-
None.
|
|
515
|
-
|
|
516
|
-
RAISES:
|
|
517
|
-
TeradataMlException
|
|
518
|
-
|
|
519
|
-
EXAMPLES:
|
|
520
|
-
# Load example data.
|
|
521
|
-
load_example_data("Script", ["barrier"])
|
|
522
|
-
|
|
523
|
-
# Example - The script mapper.py reads in a line of text input ("Old Macdonald Had A Farm") from csv and
|
|
524
|
-
# splits the line into individual words, emitting a new row for each word.
|
|
525
|
-
|
|
526
|
-
# Create teradataml DataFrame objects.
|
|
527
|
-
>>> barrierdf = DataFrame.from_table("barrier")
|
|
528
|
-
|
|
529
|
-
# Create remote user environment.
|
|
530
|
-
>>> test_env = create_env('test_env', 'python_3.7.9', 'Demo environment');
|
|
531
|
-
User environment test_env created.
|
|
532
|
-
|
|
533
|
-
# Create an Apply object that allows user to execute script using Open Analytics Framework.
|
|
534
|
-
>>> apply_obj = Apply(data=barrierdf,
|
|
535
|
-
script_name='mapper.py',
|
|
536
|
-
files_local_path='data/scripts',
|
|
537
|
-
apply_command='python mapper.py',
|
|
538
|
-
delimiter=',',
|
|
539
|
-
env_name = "test_env",
|
|
540
|
-
data_partition_column="Id",
|
|
541
|
-
returns={"word": VARCHAR(15), "count_input": VARCHAR(2)}
|
|
542
|
-
)
|
|
543
|
-
|
|
544
|
-
# Run user script locally within docker container and using data from csv.
|
|
545
|
-
# This helps the user to fix script level issues outside of Open Analytics Framework.
|
|
546
|
-
# Setup the environment by providing local path to docker image file.
|
|
547
|
-
>>> apply_obj.setup_test_env(docker_image_location='/tmp/sto_sandbox_docker_image.tar'))
|
|
548
|
-
Loading image from /tmp/sto_sandbox_docker_image.tar. It may take few minutes.
|
|
549
|
-
Image loaded successfully.
|
|
550
|
-
"""
|
|
551
|
-
self.awu_matrix_setup=[]
|
|
552
|
-
self.awu_matrix_setup.append((["docker_image_location", docker_image_location, False, (str), True]))
|
|
553
|
-
|
|
554
|
-
# Validate missing arguments
|
|
555
|
-
_Validators._validate_missing_required_arguments(self.awu_matrix_setup)
|
|
556
|
-
|
|
557
|
-
# Validate argument types
|
|
558
|
-
_Validators._validate_function_arguments(self.awu_matrix_setup)
|
|
559
|
-
|
|
560
|
-
# get the frame object of the function.
|
|
561
|
-
import inspect
|
|
562
|
-
frame = inspect.currentframe()
|
|
563
|
-
|
|
564
|
-
# Validate argument types.
|
|
565
|
-
_Validators._validate_module_presence('docker', frame.f_code.co_name)
|
|
566
|
-
|
|
567
|
-
import docker
|
|
568
|
-
# Load image from user provided location
|
|
569
|
-
client = docker.from_env()
|
|
570
|
-
if not Path(docker_image_location).exists():
|
|
571
|
-
raise TeradataMlException(
|
|
572
|
-
Messages.get_message(MessageCodes.INPUT_FILE_NOT_FOUND).format(docker_image_location),
|
|
573
|
-
MessageCodes.INPUT_FILE_NOT_FOUND)
|
|
574
|
-
else:
|
|
575
|
-
try:
|
|
576
|
-
print("Loading image from {0}. It may take few minutes.".format(docker_image_location))
|
|
577
|
-
with open(docker_image_location, 'rb') as f:
|
|
578
|
-
client.images.load(f)
|
|
579
|
-
print("Image loaded successfully.")
|
|
580
|
-
except:
|
|
581
|
-
raise
|
|
582
|
-
|
|
583
|
-
# Set _latest_sandbox_exists to True - which indicates sandbox image for STO exists on the system
|
|
584
|
-
configure._latest_sandbox_exists = True
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
def setup_sto_env(self, docker_image_location):
|
|
588
|
-
"""
|
|
589
|
-
DESCRIPTION:
|
|
590
|
-
Function enables user to load already downloaded sandbox image.
|
|
591
|
-
|
|
592
|
-
PARAMETERS:
|
|
593
|
-
docker_image_location:
|
|
594
|
-
Required Argument.
|
|
595
|
-
Specifies the location of image on user's system.
|
|
596
|
-
Types: str
|
|
597
|
-
|
|
598
|
-
Note:
|
|
599
|
-
For location to download docker image refer teradataml User Guide.
|
|
600
|
-
|
|
601
|
-
RETURNS:
|
|
602
|
-
None.
|
|
603
|
-
|
|
604
|
-
RAISES:
|
|
605
|
-
TeradataMlException
|
|
606
|
-
|
|
607
|
-
EXAMPLES:
|
|
608
|
-
# Note - Refer to User Guide for setting search path and required permissions.
|
|
609
|
-
# Load example data.
|
|
610
|
-
load_example_data("Script", ["barrier"])
|
|
611
|
-
|
|
612
|
-
# Example - The script mapper.py reads in a line of text input
|
|
613
|
-
# ("Old Macdonald Had A Farm") from csv and
|
|
614
|
-
# splits the line into individual words, emitting a new row for each word.
|
|
615
|
-
|
|
616
|
-
# Create teradataml DataFrame objects.
|
|
617
|
-
>>> barrierdf = DataFrame.from_table("barrier")
|
|
618
|
-
|
|
619
|
-
# Set SEARCHUIFDBPATH.
|
|
620
|
-
>>> execute_sql("SET SESSION SEARCHUIFDBPATH = alice;")
|
|
621
|
-
|
|
622
|
-
# Create a Script object that allows us to execute script on Vantage.
|
|
623
|
-
>>> import os
|
|
624
|
-
>>> td_path = os.path.dirname(teradataml.__file__)
|
|
625
|
-
>>> from teradatasqlalchemy import VARCHAR
|
|
626
|
-
>>> sto = Script(data=barrierdf,
|
|
627
|
-
... script_name='mapper.py',
|
|
628
|
-
... files_local_path= os.path.join(td_path, 'data', 'scripts'),
|
|
629
|
-
... script_command='python ./alice/mapper.py',
|
|
630
|
-
... data_order_column="Id",
|
|
631
|
-
... is_local_order=False,
|
|
632
|
-
... nulls_first=False,
|
|
633
|
-
... sort_ascending=False,
|
|
634
|
-
... charset='latin',
|
|
635
|
-
... returns=OrderedDict([("word", VARCHAR(15)),("count_input", VARCHAR(2))]))
|
|
636
|
-
|
|
637
|
-
# Run user script locally within docker container and using data from csv.
|
|
638
|
-
# This helps the user to fix script level issues outside Vantage.
|
|
639
|
-
# Setup the environment by providing local path to docker image file.
|
|
640
|
-
>>> sto.setup_sto_env(docker_image_location='/tmp/sto_sandbox_docker_image.tar')
|
|
641
|
-
Loading image from /tmp/sto_sandbox_docker_image.tar. It may take few minutes.
|
|
642
|
-
Image loaded successfully.
|
|
643
|
-
Starting a container for stosandbox:1.0 image.
|
|
644
|
-
Container d7c73cb498c79a082180576bb5b10bb07b52efdd3026856146fc15e91147b19f
|
|
645
|
-
started successfully.
|
|
646
|
-
|
|
647
|
-
"""
|
|
648
|
-
self.awu_matrix_setup = []
|
|
649
|
-
self.awu_matrix_setup.append((["docker_image_location", docker_image_location,
|
|
650
|
-
False, (str), True]))
|
|
651
|
-
|
|
652
|
-
# Validate missing arguments.
|
|
653
|
-
_Validators._validate_missing_required_arguments(self.awu_matrix_setup)
|
|
654
|
-
|
|
655
|
-
# Validate argument types.
|
|
656
|
-
_Validators._validate_function_arguments(self.awu_matrix_setup)
|
|
657
|
-
|
|
658
|
-
from teradataml.table_operators.sandbox_container_util import setup_sandbox_env
|
|
659
|
-
setup_sandbox_env(sandbox_image_location=docker_image_location,
|
|
660
|
-
sandbox_image_name='stosandbox:1.0')
|
|
661
|
-
|
|
662
|
-
# Set _latest_sandbox_exists to True - which indicates sandbox image for STO
|
|
663
|
-
# exists on the system.
|
|
664
|
-
from teradataml.options.configure import configure
|
|
665
|
-
configure._latest_sandbox_exists = True
|
|
666
501
|
|
|
667
502
|
def test_script(self, supporting_files=None, input_data_file=None, script_args="",
|
|
668
|
-
exec_mode='
|
|
503
|
+
exec_mode='local', **kwargs):
|
|
669
504
|
"""
|
|
670
505
|
DESCRIPTION:
|
|
671
506
|
Function enables user to run script in docker container environment outside
|
|
@@ -697,10 +532,9 @@ class TableOperator:
|
|
|
697
532
|
exec_mode:
|
|
698
533
|
Optional Argument.
|
|
699
534
|
Specifies the mode in which user wants to test the script.
|
|
700
|
-
If set to '
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
Default Value: 'sandbox'
|
|
535
|
+
If set to 'local', the user script will run locally on user's system.
|
|
536
|
+
Permitted Values: 'local'
|
|
537
|
+
Default Value: 'local'
|
|
704
538
|
Types: str
|
|
705
539
|
|
|
706
540
|
kwargs:
|
|
@@ -734,13 +568,6 @@ class TableOperator:
|
|
|
734
568
|
Default Value: True
|
|
735
569
|
Types: bool
|
|
736
570
|
|
|
737
|
-
timeout:
|
|
738
|
-
Optional Argument.
|
|
739
|
-
Specifies the timeout for docker API calls when running in
|
|
740
|
-
sandbox mode.
|
|
741
|
-
Default Value: 5000
|
|
742
|
-
Types: int
|
|
743
|
-
|
|
744
571
|
data_file_quote_char:
|
|
745
572
|
Optional Argument.
|
|
746
573
|
Specifies the quotechar used in the input data file.
|
|
@@ -811,23 +638,6 @@ class TableOperator:
|
|
|
811
638
|
EXAMPLES:
|
|
812
639
|
# Assumption - sto is Script() object. Please refer to help(Script)
|
|
813
640
|
# for creating Script object.
|
|
814
|
-
# Run user script in sandbox mode with input from data file.
|
|
815
|
-
|
|
816
|
-
>>> sto.test_script(input_data_file='../barrier.csv',
|
|
817
|
-
... data_file_delimiter=',',
|
|
818
|
-
... data_file_quote_char='"',
|
|
819
|
-
... data_file_header=True,
|
|
820
|
-
... exec_mode='sandbox')
|
|
821
|
-
|
|
822
|
-
############ STDOUT Output ############
|
|
823
|
-
word count_input
|
|
824
|
-
0 1 1
|
|
825
|
-
1 Old 1
|
|
826
|
-
2 Macdonald 1
|
|
827
|
-
3 Had 1
|
|
828
|
-
4 A 1
|
|
829
|
-
5 Farm 1
|
|
830
|
-
>>>
|
|
831
641
|
|
|
832
642
|
# Run user script in local mode with input from table.
|
|
833
643
|
>>> sto.test_script(data_row_limit=300, password='alice', exec_mode='local')
|
|
@@ -841,19 +651,19 @@ class TableOperator:
|
|
|
841
651
|
4 A 1
|
|
842
652
|
5 Farm 1
|
|
843
653
|
|
|
844
|
-
# Run user script in
|
|
654
|
+
# Run user script in local mode with logmech as 'TD2'.
|
|
845
655
|
>>> sto.test_script(script_args="4 5 10 6 480", password="alice", logmech="TD2")
|
|
846
656
|
|
|
847
|
-
# Run user script in
|
|
657
|
+
# Run user script in local mode with logmech as 'TDNEGO'.
|
|
848
658
|
>>> sto.test_script(script_args="4 5 10 6 480", password="alice", logmech="TDNEGO")
|
|
849
659
|
|
|
850
|
-
# Run user script in
|
|
660
|
+
# Run user script in local mode with logmech as 'LDAP'.
|
|
851
661
|
>>> sto.test_script(script_args="4 5 10 6 480", password="alice", logmech="LDAP")
|
|
852
662
|
|
|
853
|
-
# Run user script in
|
|
663
|
+
# Run user script in local mode with logmech as 'KRB5'.
|
|
854
664
|
>>> sto.test_script(script_args="4 5 10 6 480", password="alice", logmech="KRB5")
|
|
855
665
|
|
|
856
|
-
# Run user script in
|
|
666
|
+
# Run user script in local mode with logmech as 'JWT'.
|
|
857
667
|
>>> sto.test_script(script_args="4 5 10 6 480", password="alice",
|
|
858
668
|
logmech='JWT', logdata='token=eyJpc...h8dA')
|
|
859
669
|
|
|
@@ -866,8 +676,7 @@ class TableOperator:
|
|
|
866
676
|
awu_matrix_test.append((["input_data_file", input_data_file, True, (str), True]))
|
|
867
677
|
awu_matrix_test.append((["script_args", script_args, True, (str), False]))
|
|
868
678
|
awu_matrix_test.append((["exec_mode", exec_mode, True, (str), True,
|
|
869
|
-
[TableOperatorConstants.
|
|
870
|
-
TableOperatorConstants.LOCAL_EXEC.value]]))
|
|
679
|
+
[TableOperatorConstants.LOCAL_EXEC.value]]))
|
|
871
680
|
|
|
872
681
|
data_row_limit = kwargs.pop("data_row_limit", 1000)
|
|
873
682
|
awu_matrix_test.append((["data_row_limit", data_row_limit, True, (int), True]))
|
|
@@ -883,9 +692,6 @@ class TableOperator:
|
|
|
883
692
|
data_file_header = kwargs.pop("data_file_header", True)
|
|
884
693
|
awu_matrix_test.append((["data_file_header", data_file_header, True, (bool)]))
|
|
885
694
|
|
|
886
|
-
timeout = kwargs.pop("timeout", 5000)
|
|
887
|
-
awu_matrix_test.append((["timeout", timeout, True, (int), True]))
|
|
888
|
-
|
|
889
695
|
logmech = kwargs.pop("logmech", "TD2")
|
|
890
696
|
awu_matrix_test.append(
|
|
891
697
|
["logmech", logmech, True, (str), True, logmech_valid_values])
|
|
@@ -896,9 +702,6 @@ class TableOperator:
|
|
|
896
702
|
# Validate argument types.
|
|
897
703
|
_Validators._validate_function_arguments(awu_matrix_test)
|
|
898
704
|
|
|
899
|
-
# Validate timeout value.
|
|
900
|
-
_Validators._validate_positive_int(timeout, "timeout")
|
|
901
|
-
|
|
902
705
|
self._validate()
|
|
903
706
|
|
|
904
707
|
if logmech == "JWT" and not logdata:
|
|
@@ -914,14 +717,8 @@ class TableOperator:
|
|
|
914
717
|
# Either of 'input_data_file' or 'password' argument is required.
|
|
915
718
|
password = kwargs.pop("password", None)
|
|
916
719
|
|
|
917
|
-
# The check of EITHER_THIS_OR_THAT_ARGUMENT is applicable only when the exec_mode is sandbox.
|
|
918
|
-
# Hence adding the check exec_mode != "local".
|
|
919
720
|
# When exec_mode is local, the connection object is used to get the values in the table.
|
|
920
|
-
if exec_mode
|
|
921
|
-
message = Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
922
|
-
"input_data_file", "Script data and password")
|
|
923
|
-
raise TeradataMlException(message, MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
924
|
-
elif exec_mode == "local" and not (input_data_file or self.data):
|
|
721
|
+
if exec_mode == "local" and not (input_data_file or self.data):
|
|
925
722
|
message = Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT,
|
|
926
723
|
"input_data_file", "Script data")
|
|
927
724
|
raise TeradataMlException(message, MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
@@ -984,171 +781,6 @@ class TableOperator:
|
|
|
984
781
|
return self.__process_test_script_output(exec_cmd_output)
|
|
985
782
|
except Exception as exp:
|
|
986
783
|
raise
|
|
987
|
-
else:
|
|
988
|
-
# Execution Mode - sandbox.
|
|
989
|
-
|
|
990
|
-
# get the frame object of the function.
|
|
991
|
-
import inspect
|
|
992
|
-
frame = inspect.currentframe()
|
|
993
|
-
|
|
994
|
-
# Validate argument types.
|
|
995
|
-
_Validators._validate_module_presence('docker', frame.f_code.co_name)
|
|
996
|
-
|
|
997
|
-
# Read container_id from configure.sandbox_container_id, if it is None then
|
|
998
|
-
# raise an exception
|
|
999
|
-
container_id = configure.sandbox_container_id
|
|
1000
|
-
if container_id is None:
|
|
1001
|
-
message = Messages.get_message(MessageCodes.SANDBOX_CONTAINER_NOT_FOUND)
|
|
1002
|
-
raise TeradataMlException(message,
|
|
1003
|
-
MessageCodes.SANDBOX_CONTAINER_NOT_FOUND)
|
|
1004
|
-
|
|
1005
|
-
# Set path inside docker container. This is where files will be copied to.
|
|
1006
|
-
# os.path.join() will not work here because the path is not dependent on
|
|
1007
|
-
# client platform. Sandbox environment is linux based here.
|
|
1008
|
-
_path_in_docker_container = "/home/tdatuser"
|
|
1009
|
-
user_script_path = "{}/{}".format(_path_in_docker_container, self.script_name)
|
|
1010
|
-
|
|
1011
|
-
if input_data_file is not None:
|
|
1012
|
-
input_file_name = os.path.basename(input_data_file)
|
|
1013
|
-
input_file_path = "{}/{}".format(_path_in_docker_container,
|
|
1014
|
-
input_file_name)
|
|
1015
|
-
# Create script_executor.
|
|
1016
|
-
self._create_executor_script(user_script_path=user_script_path,
|
|
1017
|
-
user_script_args=script_args,
|
|
1018
|
-
data_file_path=input_file_path,
|
|
1019
|
-
data_file_delimiter=data_file_delimiter,
|
|
1020
|
-
data_file_quote_char=data_file_quote_char,
|
|
1021
|
-
data_file_header=data_file_header)
|
|
1022
|
-
else:
|
|
1023
|
-
# Read input from db.
|
|
1024
|
-
if self.data.shape[0] > data_row_limit:
|
|
1025
|
-
raise ValueError(
|
|
1026
|
-
Messages.get_message(MessageCodes.DATAFRAME_LIMIT_ERROR,
|
|
1027
|
-
'data_row_limit', 'data_row_limit',
|
|
1028
|
-
data_row_limit))
|
|
1029
|
-
db_host = get_context().url.host
|
|
1030
|
-
|
|
1031
|
-
user_name = get_context().url.username
|
|
1032
|
-
|
|
1033
|
-
if not self.data._table_name:
|
|
1034
|
-
self.data._table_name = df_utils._execute_node_return_db_object_name(
|
|
1035
|
-
self.data._nodeid, self.data._metaexpr)
|
|
1036
|
-
table_name = UtilFuncs._extract_table_name(self.data._table_name)
|
|
1037
|
-
|
|
1038
|
-
db_name = _get_current_databasename()
|
|
1039
|
-
|
|
1040
|
-
# Create script_executor.
|
|
1041
|
-
self._create_executor_script(user_script_path=user_script_path,
|
|
1042
|
-
user_script_args=script_args,
|
|
1043
|
-
db_host=db_host,
|
|
1044
|
-
user_name=user_name,
|
|
1045
|
-
passwd=password,
|
|
1046
|
-
table_name=table_name,
|
|
1047
|
-
db_name=db_name,
|
|
1048
|
-
logmech=logmech,
|
|
1049
|
-
logdata=logdata)
|
|
1050
|
-
|
|
1051
|
-
import docker
|
|
1052
|
-
client = docker.APIClient(timeout=timeout)
|
|
1053
|
-
|
|
1054
|
-
# Copy files to container indicated in configure.sandbox_container_id.
|
|
1055
|
-
files_to_copy = [self.script_name]
|
|
1056
|
-
|
|
1057
|
-
if supporting_files is not None:
|
|
1058
|
-
if isinstance(supporting_files, str):
|
|
1059
|
-
supporting_files = [supporting_files]
|
|
1060
|
-
|
|
1061
|
-
if len(supporting_files) == 0 \
|
|
1062
|
-
or any(file in [None, "None", ""] for file in supporting_files):
|
|
1063
|
-
raise ValueError(
|
|
1064
|
-
Messages.get_message(MessageCodes.LIST_SELECT_NONE_OR_EMPTY,
|
|
1065
|
-
'supporting_files'))
|
|
1066
|
-
else:
|
|
1067
|
-
files_to_copy.extend(supporting_files)
|
|
1068
|
-
|
|
1069
|
-
if input_data_file is not None:
|
|
1070
|
-
files_to_copy.append(input_data_file)
|
|
1071
|
-
|
|
1072
|
-
for filename in files_to_copy:
|
|
1073
|
-
file_path = os.path.join(self.files_local_path, filename)
|
|
1074
|
-
# Check if file exists.
|
|
1075
|
-
_Validators._validate_file_exists(file_path)
|
|
1076
|
-
|
|
1077
|
-
# Copy file to docker container.
|
|
1078
|
-
|
|
1079
|
-
self._copy_to_docker_container(client, file_path,
|
|
1080
|
-
_path_in_docker_container,
|
|
1081
|
-
container_id)
|
|
1082
|
-
|
|
1083
|
-
# Copy script_executor to docker container.
|
|
1084
|
-
self._copy_to_docker_container(client, self.script_path,
|
|
1085
|
-
_path_in_docker_container,
|
|
1086
|
-
container_id)
|
|
1087
|
-
|
|
1088
|
-
script_executor_file_name = os.path.basename(self.script_path)
|
|
1089
|
-
exec_cmd = ("python3 {0}/{1}".format(_path_in_docker_container,
|
|
1090
|
-
script_executor_file_name))
|
|
1091
|
-
|
|
1092
|
-
try:
|
|
1093
|
-
# Setup an exec instance in the container.
|
|
1094
|
-
exec_cmd_create = client.exec_create(container_id, exec_cmd)
|
|
1095
|
-
|
|
1096
|
-
# Start exec instance and run user script.
|
|
1097
|
-
exec_cmd_output = client.exec_start(exec_cmd_create, demux=True)
|
|
1098
|
-
|
|
1099
|
-
# Inspect the output for success or failure.
|
|
1100
|
-
inspect_out = client.exec_inspect(exec_cmd_create)
|
|
1101
|
-
|
|
1102
|
-
# Extract the exit code.
|
|
1103
|
-
exit_code = inspect_out['ExitCode']
|
|
1104
|
-
|
|
1105
|
-
if exec_cmd_output[0] is not None:
|
|
1106
|
-
executor_output = exec_cmd_output[0].decode()
|
|
1107
|
-
|
|
1108
|
-
executor_error = ""
|
|
1109
|
-
if exec_cmd_output[1] is not None:
|
|
1110
|
-
executor_error = exec_cmd_output[1].decode()
|
|
1111
|
-
|
|
1112
|
-
# Exit code 1 indicates any error thrown by subprocess.
|
|
1113
|
-
# Exit code 126 indicates permission problem or command is not executable.
|
|
1114
|
-
# Exit code 127 indicates possible typos in shell script with
|
|
1115
|
-
# unrecognizable characters.
|
|
1116
|
-
if exit_code == 1 or exit_code == 126 or exit_code == 127:
|
|
1117
|
-
message = Messages.get_message(
|
|
1118
|
-
MessageCodes.SANDBOX_SCRIPT_ERROR).format(executor_error)
|
|
1119
|
-
raise TeradataMlException(message,
|
|
1120
|
-
MessageCodes.SANDBOX_SCRIPT_ERROR)
|
|
1121
|
-
# Exit code 2 indicates either username or password is invalid.
|
|
1122
|
-
elif exit_code == 2:
|
|
1123
|
-
message = Messages.get_message(
|
|
1124
|
-
MessageCodes.SANDBOX_CONNECTION_ERROR).format(executor_error)
|
|
1125
|
-
raise TeradataMlException(message,
|
|
1126
|
-
MessageCodes.SANDBOX_CONNECTION_ERROR)
|
|
1127
|
-
# Exit code 3 indicates problem with query.
|
|
1128
|
-
elif exit_code == 3:
|
|
1129
|
-
message = Messages.get_message(
|
|
1130
|
-
MessageCodes.SANDBOX_QUERY_ERROR).format(executor_error)
|
|
1131
|
-
raise TeradataMlException(message,
|
|
1132
|
-
MessageCodes.SANDBOX_QUERY_ERROR)
|
|
1133
|
-
# Exit code 4 indicates all other exceptions / errors.
|
|
1134
|
-
elif exit_code == 4:
|
|
1135
|
-
message = Messages.get_message(
|
|
1136
|
-
MessageCodes.SANDBOX_CONTAINER_ERROR).format(executor_error)
|
|
1137
|
-
raise TeradataMlException(message,
|
|
1138
|
-
MessageCodes.SANDBOX_CONTAINER_ERROR)
|
|
1139
|
-
elif exit_code != 0:
|
|
1140
|
-
# Any error other than exit code 1, 2, 3, 4
|
|
1141
|
-
message = Messages.get_message(
|
|
1142
|
-
MessageCodes.SANDBOX_CONTAINER_ERROR).format(executor_error)
|
|
1143
|
-
raise TeradataMlException(message,
|
|
1144
|
-
MessageCodes.SANDBOX_CONTAINER_ERROR)
|
|
1145
|
-
else:
|
|
1146
|
-
return self.__process_test_script_output(executor_output)
|
|
1147
|
-
except Exception as exp:
|
|
1148
|
-
message = Messages.get_message(
|
|
1149
|
-
MessageCodes.SANDBOX_CONTAINER_ERROR).format(str(exp))
|
|
1150
|
-
raise TeradataMlException(message,
|
|
1151
|
-
MessageCodes.SANDBOX_CONTAINER_ERROR)
|
|
1152
784
|
|
|
1153
785
|
def __local_run_user_script_input_file(self, cmd, input_file_path,
|
|
1154
786
|
data_file_delimiter='\t',
|
|
@@ -1369,248 +1001,207 @@ class TableOperator:
|
|
|
1369
1001
|
|
|
1370
1002
|
return self.__run_user_script_subprocess(cmd, db_data_handle)
|
|
1371
1003
|
|
|
1372
|
-
def
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
table_name=None):
|
|
1004
|
+
def __repr__(self):
|
|
1005
|
+
"""
|
|
1006
|
+
Returns the string representation for the class instance.
|
|
1007
|
+
"""
|
|
1008
|
+
if self.result is None:
|
|
1009
|
+
repr_string = "Result is empty. Please run execute_script first."
|
|
1010
|
+
else:
|
|
1011
|
+
repr_string = "############ STDOUT Output ############"
|
|
1012
|
+
repr_string = "{}\n\n{}".format(repr_string, self.result)
|
|
1013
|
+
return repr_string
|
|
1014
|
+
|
|
1015
|
+
def deploy(self, model_column, partition_columns=None, model_file_prefix=None):
|
|
1385
1016
|
"""
|
|
1386
1017
|
DESCRIPTION:
|
|
1387
|
-
|
|
1388
|
-
|
|
1018
|
+
Function deploys the model generated after `execute_script()` in database or user
|
|
1019
|
+
environment in lake.
|
|
1389
1020
|
|
|
1390
1021
|
PARAMETERS:
|
|
1391
|
-
|
|
1392
|
-
Required Argument.
|
|
1393
|
-
Specifies the path to user script inside docker container.
|
|
1394
|
-
Types: str
|
|
1395
|
-
|
|
1396
|
-
user_script_args:
|
|
1397
|
-
Optional Argument.
|
|
1398
|
-
Specifies command line arguments required by the user script.
|
|
1399
|
-
Types: str
|
|
1400
|
-
|
|
1401
|
-
data_file_path:
|
|
1022
|
+
model_column:
|
|
1402
1023
|
Required Argument.
|
|
1403
|
-
Specifies the
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
Specifies the delimiter used in input data file.
|
|
1409
|
-
Default Value: "\t" (tab)
|
|
1410
|
-
Types: character of length 1
|
|
1411
|
-
|
|
1412
|
-
data_file_quote_char:
|
|
1413
|
-
Optional Argument.
|
|
1414
|
-
Specifies the quote character used in input data file.
|
|
1415
|
-
Default Value: '"'
|
|
1416
|
-
Types: character of length 1
|
|
1417
|
-
|
|
1418
|
-
data_file_header:
|
|
1419
|
-
Optional Argument.
|
|
1420
|
-
Specifies whether the input data file has header.
|
|
1421
|
-
Default Value: True
|
|
1422
|
-
Types: bool
|
|
1423
|
-
|
|
1424
|
-
db_name:
|
|
1425
|
-
Optional Argument.
|
|
1426
|
-
Specifies the current database name.
|
|
1427
|
-
Default Value: None
|
|
1428
|
-
Types: str
|
|
1429
|
-
|
|
1430
|
-
db_host:
|
|
1431
|
-
Optional Argument.
|
|
1432
|
-
Specifies the host name.
|
|
1433
|
-
Default Value: None
|
|
1434
|
-
Types: str
|
|
1435
|
-
|
|
1436
|
-
user_name:
|
|
1437
|
-
Optional Argument.
|
|
1438
|
-
Specifies the user name.
|
|
1439
|
-
Default Value: None
|
|
1024
|
+
Specifies the column name in which model is present.
|
|
1025
|
+
Supported types of model in this column are CLOB and BLOB.
|
|
1026
|
+
Note:
|
|
1027
|
+
The column mentioned in this argument should be present in
|
|
1028
|
+
<apply_obj/script_obj>.result.
|
|
1440
1029
|
Types: str
|
|
1441
1030
|
|
|
1442
|
-
|
|
1031
|
+
partition_columns:
|
|
1443
1032
|
Optional Argument.
|
|
1444
|
-
Specifies the
|
|
1445
|
-
|
|
1446
|
-
|
|
1033
|
+
Specifies the columns on which data is partitioned.
|
|
1034
|
+
Note:
|
|
1035
|
+
The columns mentioned in this argument should be present in
|
|
1036
|
+
<apply_obj/script_obj>.result.
|
|
1037
|
+
Types: str OR list of str
|
|
1447
1038
|
|
|
1448
|
-
|
|
1039
|
+
model_file_prefix:
|
|
1449
1040
|
Optional Argument.
|
|
1450
|
-
Specifies the
|
|
1451
|
-
|
|
1041
|
+
Specifies the prefix to be used to the generated model file.
|
|
1042
|
+
If this argument is None, prefix is auto-generated.
|
|
1043
|
+
If the argument "model_column" contains multiple models and
|
|
1044
|
+
* "partition_columns" is None - model file prefix is appended with
|
|
1045
|
+
underscore(_) and numbers starting from one(1) to get model file
|
|
1046
|
+
names.
|
|
1047
|
+
* "partition_columns" is NOT None - model file prefix is appended
|
|
1048
|
+
with underscore(_) and unique values in partition_columns are joined
|
|
1049
|
+
with underscore(_) to generate model file names.
|
|
1452
1050
|
Types: str
|
|
1453
1051
|
|
|
1454
1052
|
RETURNS:
|
|
1455
|
-
|
|
1053
|
+
List of generated file names.
|
|
1456
1054
|
|
|
1457
1055
|
RAISES:
|
|
1458
|
-
|
|
1056
|
+
TeradatamlException
|
|
1459
1057
|
|
|
1460
1058
|
EXAMPLES:
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1059
|
+
>>> load_example_data("openml", "multi_model_classification")
|
|
1060
|
+
|
|
1061
|
+
>>> df = DataFrame("multi_model_classification")
|
|
1062
|
+
>>> df
|
|
1063
|
+
col2 col3 col4 label group_column partition_column_1 partition_column_2
|
|
1064
|
+
col1
|
|
1065
|
+
-1.013454 0.855765 -0.256920 -0.085301 1 9 0 10
|
|
1066
|
+
-3.146552 -1.805530 -0.071515 -2.093998 0 10 0 10
|
|
1067
|
+
-1.175097 -0.950745 0.018280 -0.895335 1 10 0 11
|
|
1068
|
+
0.218497 -0.968924 0.183037 -0.303142 0 11 0 11
|
|
1069
|
+
-1.471908 -0.029195 -0.166141 -0.645309 1 11 1 10
|
|
1070
|
+
1.082336 0.846357 -0.012063 0.812633 1 11 1 11
|
|
1071
|
+
-1.132068 -1.209750 0.065422 -0.982986 0 10 1 10
|
|
1072
|
+
-0.440339 2.290676 -0.423878 0.749467 1 8 1 10
|
|
1073
|
+
-0.615226 -0.546472 0.017496 -0.488720 0 12 0 10
|
|
1074
|
+
0.579671 -0.573365 0.160603 0.014404 0 9 1 10
|
|
1075
|
+
|
|
1076
|
+
# Install Script file.
|
|
1077
|
+
>>> file_location = os.path.join(os.path.dirname(teradataml.__file__), "data", "scripts", "deploy_script.py")
|
|
1078
|
+
>>> install_file("deploy_script", file_location, replace=True)
|
|
1079
|
+
|
|
1080
|
+
# Variables needed for Script execution.
|
|
1081
|
+
>>> script_command = '/opt/teradata/languages/Python/bin/python3 ./ALICE/deploy_script.py'
|
|
1082
|
+
>>> partition_columns = ["partition_column_1", "partition_column_2"]
|
|
1083
|
+
>>> columns = ["col1", "col2", "col3", "col4", "label",
|
|
1084
|
+
"partition_column_1", "partition_column_2"]
|
|
1085
|
+
>>> returns = OrderedDict([("partition_column_1", INTEGER()),
|
|
1086
|
+
("partition_column_2", INTEGER()),
|
|
1087
|
+
("model", CLOB())])
|
|
1088
|
+
|
|
1089
|
+
# Script execution.
|
|
1090
|
+
>>> obj = Script(data=df.select(columns),
|
|
1091
|
+
script_command=script_command,
|
|
1092
|
+
data_partition_column=partition_columns,
|
|
1093
|
+
returns=returns
|
|
1094
|
+
)
|
|
1095
|
+
>>> opt = obj.execute_script()
|
|
1096
|
+
>>> opt
|
|
1097
|
+
partition_column_1 partition_column_2 model model
|
|
1098
|
+
0 10 b'gAejc1.....drIr'
|
|
1099
|
+
0 11 b'gANjcw.....qWIu'
|
|
1100
|
+
1 10 b'abdwcd.....dWIz'
|
|
1101
|
+
1 11 b'gA4jc4.....agfu'
|
|
1102
|
+
|
|
1103
|
+
# Example 1: Provide only "partition_columns" argument. Here, "model_file_prefix"
|
|
1104
|
+
# is auto generated.
|
|
1105
|
+
>>> obj.deploy(model_column="model",
|
|
1106
|
+
partition_columns=["partition_column_1", "partition_column_2"])
|
|
1107
|
+
>>> ['model_file_1710436227163427__0_10',
|
|
1108
|
+
'model_file_1710436227163427__1_10',
|
|
1109
|
+
'model_file_1710436227163427__0_11',
|
|
1110
|
+
'model_file_1710436227163427__1_11']
|
|
1111
|
+
|
|
1112
|
+
# Example 2: Provide only "model_file_prefix" argument. Here, filenames are suffixed
|
|
1113
|
+
# with 1, 2, 3, ... for multiple models.
|
|
1114
|
+
>>> obj.deploy(model_column="model", model_file_prefix="my_prefix_new_")
|
|
1115
|
+
['my_prefix_new__1',
|
|
1116
|
+
'my_prefix_new__2',
|
|
1117
|
+
'my_prefix_new__3',
|
|
1118
|
+
'my_prefix_new__4']
|
|
1119
|
+
|
|
1120
|
+
# Example 3: Without both "partition_columns" and "model_file_prefix" arguments.
|
|
1121
|
+
>>> obj.deploy(model_column="model")
|
|
1122
|
+
['model_file_1710438346528596__1',
|
|
1123
|
+
'model_file_1710438346528596__2',
|
|
1124
|
+
'model_file_1710438346528596__3',
|
|
1125
|
+
'model_file_1710438346528596__4']
|
|
1126
|
+
|
|
1127
|
+
# Example 4: Provide both "partition_columns" and "model_file_prefix" arguments.
|
|
1128
|
+
>>> obj.deploy(model_column="model", model_file_prefix="my_prefix_new_",
|
|
1129
|
+
partition_columns=["partition_column_1", "partition_column_2"])
|
|
1130
|
+
['my_prefix_new__0_10',
|
|
1131
|
+
'my_prefix_new__0_11',
|
|
1132
|
+
'my_prefix_new__1_10',
|
|
1133
|
+
'my_prefix_new__1_11']
|
|
1134
|
+
|
|
1480
1135
|
"""
|
|
1481
|
-
__data_source = "db"
|
|
1482
|
-
if data_file_path:
|
|
1483
|
-
__data_source = "file"
|
|
1484
1136
|
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1137
|
+
arg_info_matrix = []
|
|
1138
|
+
arg_info_matrix.append(["model_column", model_column, False, (str)])
|
|
1139
|
+
arg_info_matrix.append(["partition_columns", partition_columns, True, (str, list)])
|
|
1140
|
+
arg_info_matrix.append(["model_file_prefix", model_file_prefix, True, (str)])
|
|
1141
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
1490
1142
|
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
UtilFuncs._extract_table_name(temp_script_name), quote='"')
|
|
1143
|
+
if self.result is None:
|
|
1144
|
+
return "Result is empty. Please run execute_script first."
|
|
1494
1145
|
|
|
1495
|
-
|
|
1496
|
-
|
|
1146
|
+
if partition_columns is None:
|
|
1147
|
+
partition_columns = []
|
|
1148
|
+
partition_columns = UtilFuncs._as_list(partition_columns)
|
|
1497
1149
|
|
|
1498
|
-
|
|
1499
|
-
###
|
|
1150
|
+
req_columns = [model_column] + partition_columns
|
|
1500
1151
|
|
|
1501
|
-
|
|
1152
|
+
_Validators._validate_column_exists_in_dataframe(columns=req_columns, metaexpr=self.result._metaexpr)
|
|
1502
1153
|
|
|
1503
|
-
|
|
1504
|
-
|
|
1154
|
+
data = self.result.select(req_columns)
|
|
1155
|
+
data._index_column = None # Without this, first column i.e., model column will be index column.
|
|
1505
1156
|
|
|
1506
|
-
template_dir = os.path.join(os.path.dirname(
|
|
1507
|
-
os.path.dirname(os.path.abspath(__file__))),
|
|
1508
|
-
"table_operators", "templates")
|
|
1509
|
-
try:
|
|
1510
|
-
# Write to the script based on the template.
|
|
1511
|
-
#
|
|
1512
|
-
from teradataml.common.constants import TableOperatorConstants
|
|
1513
|
-
executor_file = os.path.join(template_dir,
|
|
1514
|
-
TableOperatorConstants.SCRIPT_TEMPLATE.value)
|
|
1515
|
-
with open(executor_file, 'r') as input_file:
|
|
1516
|
-
with open(self.script_path, 'w') as output_file:
|
|
1517
|
-
os.chmod(self.script_path, 0o644)
|
|
1518
|
-
output_file.write(
|
|
1519
|
-
input_file.read().format(
|
|
1520
|
-
DATA_SOURCE=UtilFuncs._serialize_and_encode(__data_source),
|
|
1521
|
-
DELIMITER=UtilFuncs._serialize_and_encode(self.delimiter),
|
|
1522
|
-
QUOTECHAR=UtilFuncs._serialize_and_encode(self.quotechar),
|
|
1523
|
-
USER_SCRIPT_PATH=UtilFuncs._serialize_and_encode(
|
|
1524
|
-
user_script_path),
|
|
1525
|
-
SCRIPT_ARGS=UtilFuncs._serialize_and_encode(user_script_args),
|
|
1526
|
-
DATA_FILE_PATH=UtilFuncs._serialize_and_encode(
|
|
1527
|
-
data_file_path),
|
|
1528
|
-
INPUT_DATA_FILE_DELIMITER=UtilFuncs._serialize_and_encode(
|
|
1529
|
-
data_file_delimiter),
|
|
1530
|
-
INPUT_DATA_FILE_QUOTE_CHAR=UtilFuncs._serialize_and_encode(
|
|
1531
|
-
data_file_quote_char),
|
|
1532
|
-
INPUT_DATA_FILE_HEADER=UtilFuncs._serialize_and_encode(
|
|
1533
|
-
data_file_header),
|
|
1534
|
-
DB_HOST=UtilFuncs._serialize_and_encode(db_host),
|
|
1535
|
-
DB_USER=UtilFuncs._serialize_and_encode(user_name),
|
|
1536
|
-
DB_PASS=UtilFuncs._serialize_and_encode(passwd),
|
|
1537
|
-
DB_NAME=UtilFuncs._serialize_and_encode(db_name),
|
|
1538
|
-
TABLE_NAME=UtilFuncs._serialize_and_encode(table_name),
|
|
1539
|
-
LOGMECH=UtilFuncs._serialize_and_encode(logmech),
|
|
1540
|
-
LOGDATA=UtilFuncs._serialize_and_encode(logdata)
|
|
1541
|
-
))
|
|
1542
|
-
except Exception:
|
|
1543
|
-
# Cleanup if we end up here in case of an error.
|
|
1544
|
-
GarbageCollector._delete_local_file(self.script_path)
|
|
1545
|
-
raise
|
|
1546
|
-
|
|
1547
|
-
def _copy_to_docker_container(self, client,
|
|
1548
|
-
local_file_path,
|
|
1549
|
-
path_in_docker_container,
|
|
1550
|
-
container):
|
|
1551
|
-
"""
|
|
1552
|
-
DESCRIPTION:
|
|
1553
|
-
Function to copy files to docker container.
|
|
1554
1157
|
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1158
|
+
if model_file_prefix is None:
|
|
1159
|
+
timestamp = time.time()
|
|
1160
|
+
tmp = "{}{}".format(floor(timestamp / 1000000),
|
|
1161
|
+
floor(timestamp % 1000000 * 1000000 +
|
|
1162
|
+
int(str(uuid.uuid4().fields[-1])[:10])))
|
|
1163
|
+
model_file_prefix = f"model_file_{tmp}_"
|
|
1560
1164
|
|
|
1561
|
-
|
|
1562
|
-
Required Argument.
|
|
1563
|
-
Specifies the path to the file to be copied.
|
|
1564
|
-
Types: str
|
|
1165
|
+
vals = data.get_values()
|
|
1565
1166
|
|
|
1566
|
-
|
|
1567
|
-
Required Argument.
|
|
1568
|
-
Specifies destination path in the docker container where file will be
|
|
1569
|
-
copied to.
|
|
1570
|
-
Types: str
|
|
1167
|
+
model_column_type = data._td_column_names_and_sqlalchemy_types[model_column.lower()].__class__.__name__
|
|
1571
1168
|
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
Specifies container id.
|
|
1575
|
-
Types: str
|
|
1169
|
+
n_models = len(vals)
|
|
1170
|
+
all_files = []
|
|
1576
1171
|
|
|
1577
|
-
|
|
1578
|
-
|
|
1172
|
+
for i, row in enumerate(vals):
|
|
1173
|
+
model = row[0]
|
|
1174
|
+
partition_values = ""
|
|
1175
|
+
if partition_columns:
|
|
1176
|
+
partition_values = "_".join([str(x) for x in row[1:]])
|
|
1177
|
+
elif n_models > 1:
|
|
1178
|
+
partition_values = str(i+1)
|
|
1579
1179
|
|
|
1580
|
-
|
|
1581
|
-
|
|
1180
|
+
model_file = f"{model_file_prefix}_{partition_values}"
|
|
1181
|
+
model_file_path = os.path.join(os.path.expanduser("~"), ".teradataml", model_file)
|
|
1582
1182
|
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
data = open(tar_file_path, 'rb').read()
|
|
1183
|
+
if model_column_type == "CLOB":
|
|
1184
|
+
import base64
|
|
1185
|
+
model = base64.b64decode(model.partition("'")[2])
|
|
1186
|
+
elif model_column_type == "BLOB":
|
|
1187
|
+
# No operation needed.
|
|
1188
|
+
# Apply model training returns BLOB type.
|
|
1189
|
+
pass
|
|
1190
|
+
else:
|
|
1191
|
+
raise ValueError(f"Model column type {model_column_type} is not supported.")
|
|
1593
1192
|
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
copy_status = client.put_archive(container, path_in_docker_container, data)
|
|
1597
|
-
os.remove(tar_file_path)
|
|
1193
|
+
with open(model_file_path, "wb") as f:
|
|
1194
|
+
f.write(model)
|
|
1598
1195
|
|
|
1599
|
-
if
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1196
|
+
if self.__class__.__name__ == "Script":
|
|
1197
|
+
from teradataml import install_file
|
|
1198
|
+
install_file(file_identifier=model_file, file_path=model_file_path,
|
|
1199
|
+
is_binary=True, suppress_output=True)
|
|
1200
|
+
elif self.__class__.__name__ == "Apply":
|
|
1201
|
+
self.env.install_file(file_name=model_file_path)
|
|
1605
1202
|
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
"""
|
|
1610
|
-
if self.result is None:
|
|
1611
|
-
repr_string = "Result is empty. Please run execute_script first."
|
|
1612
|
-
else:
|
|
1613
|
-
repr_string = "############ STDOUT Output ############"
|
|
1614
|
-
repr_string = "{}\n\n{}".format(repr_string, self.result)
|
|
1615
|
-
return repr_string
|
|
1203
|
+
all_files.append(model_file)
|
|
1204
|
+
|
|
1205
|
+
os.remove(model_file_path)
|
|
1616
1206
|
|
|
1207
|
+
return all_files
|