teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +238 -1
- teradataml/__init__.py +13 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/Transformations.py +4 -4
- teradataml/analytics/__init__.py +0 -2
- teradataml/analytics/analytic_function_executor.py +3 -0
- teradataml/analytics/json_parser/utils.py +13 -12
- teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
- teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
- teradataml/analytics/sqle/__init__.py +0 -13
- teradataml/analytics/utils.py +1 -0
- teradataml/analytics/valib.py +3 -0
- teradataml/automl/__init__.py +1628 -0
- teradataml/automl/custom_json_utils.py +1270 -0
- teradataml/automl/data_preparation.py +993 -0
- teradataml/automl/data_transformation.py +727 -0
- teradataml/automl/feature_engineering.py +1648 -0
- teradataml/automl/feature_exploration.py +547 -0
- teradataml/automl/model_evaluation.py +163 -0
- teradataml/automl/model_training.py +887 -0
- teradataml/catalog/__init__.py +0 -2
- teradataml/catalog/byom.py +49 -6
- teradataml/catalog/function_argument_mapper.py +0 -2
- teradataml/catalog/model_cataloging_utils.py +2 -1021
- teradataml/common/aed_utils.py +6 -2
- teradataml/common/constants.py +50 -58
- teradataml/common/deprecations.py +160 -0
- teradataml/common/garbagecollector.py +61 -104
- teradataml/common/messagecodes.py +27 -36
- teradataml/common/messages.py +11 -15
- teradataml/common/utils.py +205 -287
- teradataml/common/wrapper_utils.py +1 -110
- teradataml/context/context.py +150 -78
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
- teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
- teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
- teradataml/data/fish.csv +160 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/insurance.csv +1 -1
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
- teradataml/data/load_example_data.py +3 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/openml_example.json +63 -0
- teradataml/data/scripts/deploy_script.py +65 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
- teradataml/data/templates/open_source_ml.json +9 -0
- teradataml/data/teradataml_example.json +73 -1
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/dataframe/copy_to.py +79 -13
- teradataml/dataframe/data_transfer.py +8 -0
- teradataml/dataframe/dataframe.py +910 -311
- teradataml/dataframe/dataframe_utils.py +102 -5
- teradataml/dataframe/fastload.py +11 -3
- teradataml/dataframe/setop.py +15 -2
- teradataml/dataframe/sql.py +3735 -77
- teradataml/dataframe/sql_function_parameters.py +56 -5
- teradataml/dataframe/vantage_function_types.py +45 -1
- teradataml/dataframe/window.py +30 -29
- teradataml/dbutils/dbutils.py +18 -1
- teradataml/geospatial/geodataframe.py +18 -7
- teradataml/geospatial/geodataframecolumn.py +5 -0
- teradataml/hyperparameter_tuner/optimizer.py +910 -120
- teradataml/hyperparameter_tuner/utils.py +131 -37
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/sklearn/__init__.py +1 -0
- teradataml/opensource/sklearn/_class.py +255 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
- teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
- teradataml/opensource/sklearn/constants.py +54 -0
- teradataml/options/__init__.py +3 -6
- teradataml/options/configure.py +21 -20
- teradataml/scriptmgmt/UserEnv.py +61 -5
- teradataml/scriptmgmt/lls_utils.py +135 -53
- teradataml/table_operators/Apply.py +38 -6
- teradataml/table_operators/Script.py +45 -308
- teradataml/table_operators/TableOperator.py +182 -591
- teradataml/table_operators/__init__.py +0 -1
- teradataml/table_operators/table_operator_util.py +32 -40
- teradataml/utils/validators.py +127 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
- teradataml/analytics/mle/AdaBoost.py +0 -651
- teradataml/analytics/mle/AdaBoostPredict.py +0 -564
- teradataml/analytics/mle/Antiselect.py +0 -342
- teradataml/analytics/mle/Arima.py +0 -641
- teradataml/analytics/mle/ArimaPredict.py +0 -477
- teradataml/analytics/mle/Attribution.py +0 -1070
- teradataml/analytics/mle/Betweenness.py +0 -658
- teradataml/analytics/mle/Burst.py +0 -711
- teradataml/analytics/mle/CCM.py +0 -600
- teradataml/analytics/mle/CCMPrepare.py +0 -324
- teradataml/analytics/mle/CFilter.py +0 -460
- teradataml/analytics/mle/ChangePointDetection.py +0 -572
- teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
- teradataml/analytics/mle/Closeness.py +0 -737
- teradataml/analytics/mle/ConfusionMatrix.py +0 -420
- teradataml/analytics/mle/Correlation.py +0 -477
- teradataml/analytics/mle/Correlation2.py +0 -573
- teradataml/analytics/mle/CoxHazardRatio.py +0 -679
- teradataml/analytics/mle/CoxPH.py +0 -556
- teradataml/analytics/mle/CoxSurvival.py +0 -478
- teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
- teradataml/analytics/mle/DTW.py +0 -623
- teradataml/analytics/mle/DWT.py +0 -564
- teradataml/analytics/mle/DWT2D.py +0 -599
- teradataml/analytics/mle/DecisionForest.py +0 -716
- teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
- teradataml/analytics/mle/DecisionForestPredict.py +0 -561
- teradataml/analytics/mle/DecisionTree.py +0 -830
- teradataml/analytics/mle/DecisionTreePredict.py +0 -528
- teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
- teradataml/analytics/mle/FMeasure.py +0 -402
- teradataml/analytics/mle/FPGrowth.py +0 -734
- teradataml/analytics/mle/FrequentPaths.py +0 -695
- teradataml/analytics/mle/GLM.py +0 -558
- teradataml/analytics/mle/GLML1L2.py +0 -547
- teradataml/analytics/mle/GLML1L2Predict.py +0 -519
- teradataml/analytics/mle/GLMPredict.py +0 -529
- teradataml/analytics/mle/HMMDecoder.py +0 -945
- teradataml/analytics/mle/HMMEvaluator.py +0 -901
- teradataml/analytics/mle/HMMSupervised.py +0 -521
- teradataml/analytics/mle/HMMUnsupervised.py +0 -572
- teradataml/analytics/mle/Histogram.py +0 -561
- teradataml/analytics/mle/IDWT.py +0 -476
- teradataml/analytics/mle/IDWT2D.py +0 -493
- teradataml/analytics/mle/IdentityMatch.py +0 -763
- teradataml/analytics/mle/Interpolator.py +0 -918
- teradataml/analytics/mle/KMeans.py +0 -485
- teradataml/analytics/mle/KNN.py +0 -627
- teradataml/analytics/mle/KNNRecommender.py +0 -488
- teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
- teradataml/analytics/mle/LAR.py +0 -439
- teradataml/analytics/mle/LARPredict.py +0 -478
- teradataml/analytics/mle/LDA.py +0 -548
- teradataml/analytics/mle/LDAInference.py +0 -492
- teradataml/analytics/mle/LDATopicSummary.py +0 -464
- teradataml/analytics/mle/LevenshteinDistance.py +0 -450
- teradataml/analytics/mle/LinReg.py +0 -433
- teradataml/analytics/mle/LinRegPredict.py +0 -438
- teradataml/analytics/mle/MinHash.py +0 -544
- teradataml/analytics/mle/Modularity.py +0 -587
- teradataml/analytics/mle/NEREvaluator.py +0 -410
- teradataml/analytics/mle/NERExtractor.py +0 -595
- teradataml/analytics/mle/NERTrainer.py +0 -458
- teradataml/analytics/mle/NGrams.py +0 -570
- teradataml/analytics/mle/NPath.py +0 -634
- teradataml/analytics/mle/NTree.py +0 -549
- teradataml/analytics/mle/NaiveBayes.py +0 -462
- teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
- teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
- teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
- teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
- teradataml/analytics/mle/NamedEntityFinder.py +0 -529
- teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
- teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
- teradataml/analytics/mle/POSTagger.py +0 -417
- teradataml/analytics/mle/Pack.py +0 -411
- teradataml/analytics/mle/PageRank.py +0 -535
- teradataml/analytics/mle/PathAnalyzer.py +0 -426
- teradataml/analytics/mle/PathGenerator.py +0 -367
- teradataml/analytics/mle/PathStart.py +0 -464
- teradataml/analytics/mle/PathSummarizer.py +0 -470
- teradataml/analytics/mle/Pivot.py +0 -471
- teradataml/analytics/mle/ROC.py +0 -425
- teradataml/analytics/mle/RandomSample.py +0 -637
- teradataml/analytics/mle/RandomWalkSample.py +0 -490
- teradataml/analytics/mle/SAX.py +0 -779
- teradataml/analytics/mle/SVMDense.py +0 -677
- teradataml/analytics/mle/SVMDensePredict.py +0 -536
- teradataml/analytics/mle/SVMDenseSummary.py +0 -437
- teradataml/analytics/mle/SVMSparse.py +0 -557
- teradataml/analytics/mle/SVMSparsePredict.py +0 -553
- teradataml/analytics/mle/SVMSparseSummary.py +0 -435
- teradataml/analytics/mle/Sampling.py +0 -549
- teradataml/analytics/mle/Scale.py +0 -565
- teradataml/analytics/mle/ScaleByPartition.py +0 -496
- teradataml/analytics/mle/ScaleMap.py +0 -378
- teradataml/analytics/mle/ScaleSummary.py +0 -320
- teradataml/analytics/mle/SentenceExtractor.py +0 -363
- teradataml/analytics/mle/SentimentEvaluator.py +0 -432
- teradataml/analytics/mle/SentimentExtractor.py +0 -578
- teradataml/analytics/mle/SentimentTrainer.py +0 -405
- teradataml/analytics/mle/SeriesSplitter.py +0 -641
- teradataml/analytics/mle/Sessionize.py +0 -475
- teradataml/analytics/mle/SimpleMovAvg.py +0 -397
- teradataml/analytics/mle/StringSimilarity.py +0 -425
- teradataml/analytics/mle/TF.py +0 -389
- teradataml/analytics/mle/TFIDF.py +0 -504
- teradataml/analytics/mle/TextChunker.py +0 -414
- teradataml/analytics/mle/TextClassifier.py +0 -399
- teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
- teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
- teradataml/analytics/mle/TextMorph.py +0 -494
- teradataml/analytics/mle/TextParser.py +0 -623
- teradataml/analytics/mle/TextTagger.py +0 -530
- teradataml/analytics/mle/TextTokenizer.py +0 -502
- teradataml/analytics/mle/UnivariateStatistics.py +0 -488
- teradataml/analytics/mle/Unpack.py +0 -526
- teradataml/analytics/mle/Unpivot.py +0 -438
- teradataml/analytics/mle/VarMax.py +0 -776
- teradataml/analytics/mle/VectorDistance.py +0 -762
- teradataml/analytics/mle/WeightedMovAvg.py +0 -400
- teradataml/analytics/mle/XGBoost.py +0 -842
- teradataml/analytics/mle/XGBoostPredict.py +0 -627
- teradataml/analytics/mle/__init__.py +0 -123
- teradataml/analytics/mle/json/adaboost_mle.json +0 -135
- teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
- teradataml/analytics/mle/json/antiselect_mle.json +0 -34
- teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
- teradataml/analytics/mle/json/arima_mle.json +0 -172
- teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
- teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
- teradataml/analytics/mle/json/betweenness_mle.json +0 -97
- teradataml/analytics/mle/json/burst_mle.json +0 -140
- teradataml/analytics/mle/json/ccm_mle.json +0 -124
- teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
- teradataml/analytics/mle/json/cfilter_mle.json +0 -93
- teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
- teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
- teradataml/analytics/mle/json/closeness_mle.json +0 -104
- teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
- teradataml/analytics/mle/json/correlation_mle.json +0 -86
- teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
- teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
- teradataml/analytics/mle/json/coxph_mle.json +0 -98
- teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
- teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
- teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
- teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
- teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
- teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
- teradataml/analytics/mle/json/dtw_mle.json +0 -97
- teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
- teradataml/analytics/mle/json/dwt_mle.json +0 -101
- teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
- teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
- teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
- teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
- teradataml/analytics/mle/json/glm_mle.json +0 -111
- teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
- teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
- teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
- teradataml/analytics/mle/json/histogram_mle.json +0 -100
- teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
- teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
- teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
- teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
- teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
- teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
- teradataml/analytics/mle/json/idwt_mle.json +0 -66
- teradataml/analytics/mle/json/interpolator_mle.json +0 -151
- teradataml/analytics/mle/json/kmeans_mle.json +0 -97
- teradataml/analytics/mle/json/knn_mle.json +0 -141
- teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
- teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
- teradataml/analytics/mle/json/lar_mle.json +0 -78
- teradataml/analytics/mle/json/larpredict_mle.json +0 -69
- teradataml/analytics/mle/json/lda_mle.json +0 -130
- teradataml/analytics/mle/json/ldainference_mle.json +0 -78
- teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
- teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
- teradataml/analytics/mle/json/linreg_mle.json +0 -42
- teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
- teradataml/analytics/mle/json/minhash_mle.json +0 -113
- teradataml/analytics/mle/json/modularity_mle.json +0 -91
- teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
- teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
- teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
- teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
- teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
- teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
- teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
- teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
- teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
- teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
- teradataml/analytics/mle/json/ngrams_mle.json +0 -137
- teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
- teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
- teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
- teradataml/analytics/mle/json/pack_mle.json +0 -58
- teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
- teradataml/analytics/mle/json/pagerank_mle.json +0 -81
- teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
- teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
- teradataml/analytics/mle/json/pathstart_mle.json +0 -62
- teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
- teradataml/analytics/mle/json/pivoting_mle.json +0 -71
- teradataml/analytics/mle/json/postagger_mle.json +0 -51
- teradataml/analytics/mle/json/randomsample_mle.json +0 -131
- teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
- teradataml/analytics/mle/json/roc_mle.json +0 -73
- teradataml/analytics/mle/json/sampling_mle.json +0 -75
- teradataml/analytics/mle/json/sax_mle.json +0 -154
- teradataml/analytics/mle/json/scale_mle.json +0 -93
- teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
- teradataml/analytics/mle/json/scalemap_mle.json +0 -44
- teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
- teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
- teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
- teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
- teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
- teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
- teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
- teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
- teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
- teradataml/analytics/mle/json/svmdense_mle.json +0 -165
- teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
- teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
- teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
- teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
- teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
- teradataml/analytics/mle/json/textchunker_mle.json +0 -40
- teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
- teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
- teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
- teradataml/analytics/mle/json/textmorph_mle.json +0 -63
- teradataml/analytics/mle/json/textparser_mle.json +0 -166
- teradataml/analytics/mle/json/texttagger_mle.json +0 -81
- teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
- teradataml/analytics/mle/json/tf_mle.json +0 -33
- teradataml/analytics/mle/json/tfidf_mle.json +0 -34
- teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
- teradataml/analytics/mle/json/unpack_mle.json +0 -91
- teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
- teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
- teradataml/analytics/mle/json/varmax_mle.json +0 -176
- teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
- teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
- teradataml/analytics/mle/json/xgboost_mle.json +0 -178
- teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
- teradataml/analytics/sqle/Antiselect.py +0 -321
- teradataml/analytics/sqle/Attribution.py +0 -603
- teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
- teradataml/analytics/sqle/GLMPredict.py +0 -430
- teradataml/analytics/sqle/MovingAverage.py +0 -543
- teradataml/analytics/sqle/NGramSplitter.py +0 -548
- teradataml/analytics/sqle/NPath.py +0 -632
- teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
- teradataml/analytics/sqle/Pack.py +0 -388
- teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
- teradataml/analytics/sqle/Sessionize.py +0 -390
- teradataml/analytics/sqle/StringSimilarity.py +0 -400
- teradataml/analytics/sqle/Unpack.py +0 -503
- teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
- teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
- teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
- teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
- teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
- teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
- teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
- teradataml/analytics/sqle/json/npath_sqle.json +0 -67
- teradataml/analytics/sqle/json/pack_sqle.json +0 -47
- teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
- teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
- teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
- teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
- teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
- teradataml/catalog/model_cataloging.py +0 -980
- teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
- teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
- teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
- teradataml/table_operators/sandbox_container_util.py +0 -643
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
- {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pickle
|
|
4
|
+
import base64
|
|
5
|
+
|
|
6
|
+
DELIMITER = '\t'
|
|
7
|
+
|
|
8
|
+
def get_value(value):
|
|
9
|
+
ret_val = value
|
|
10
|
+
try:
|
|
11
|
+
ret_val = float(value.replace(' ', ''))
|
|
12
|
+
except Exception as ex:
|
|
13
|
+
# If the value can't be converted to float, then it is string.
|
|
14
|
+
pass
|
|
15
|
+
return ret_val
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_values_list(values, ignore_none=True):
|
|
19
|
+
ret_vals = []
|
|
20
|
+
for val in values:
|
|
21
|
+
if val == "" and ignore_none:
|
|
22
|
+
# Empty cell value in the database table.
|
|
23
|
+
continue
|
|
24
|
+
ret_vals.append(get_value(val))
|
|
25
|
+
|
|
26
|
+
return ret_vals
|
|
27
|
+
|
|
28
|
+
def convert_to_type(val, typee):
|
|
29
|
+
if typee == 'int':
|
|
30
|
+
return int(val)
|
|
31
|
+
if typee == 'float':
|
|
32
|
+
val = get_value(val)
|
|
33
|
+
return float(val)
|
|
34
|
+
if typee == 'bool':
|
|
35
|
+
return bool(val)
|
|
36
|
+
return str(val)
|
|
37
|
+
|
|
38
|
+
def get_classes_as_list(classes, actual_type):
|
|
39
|
+
if classes == "None":
|
|
40
|
+
return None
|
|
41
|
+
if actual_type == "None":
|
|
42
|
+
sys.exit("type of class elements is None where class elements exists.")
|
|
43
|
+
|
|
44
|
+
# separated by '--'
|
|
45
|
+
classes = classes.split("--")
|
|
46
|
+
|
|
47
|
+
for idx, cls in enumerate(classes):
|
|
48
|
+
classes[idx] = convert_to_type(cls, actual_type)
|
|
49
|
+
|
|
50
|
+
return classes
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def splitter(strr, delim=",", convert_to="str"):
|
|
54
|
+
"""
|
|
55
|
+
Split the string based on delimiter and convert to the type specified.
|
|
56
|
+
"""
|
|
57
|
+
if strr == "None":
|
|
58
|
+
return []
|
|
59
|
+
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
60
|
+
|
|
61
|
+
# Arguments to the Script
|
|
62
|
+
if len(sys.argv) != 10:
|
|
63
|
+
# 10 arguments command line arguments should be passed to this file.
|
|
64
|
+
# 1: file to be run
|
|
65
|
+
# 2. function name
|
|
66
|
+
# 3. No of feature columns.
|
|
67
|
+
# 4. No of class labels.
|
|
68
|
+
# 5. Comma separated indices of partition columns.
|
|
69
|
+
# 6. Comma separated types of the partition columns.
|
|
70
|
+
# 7. Model file prefix to generated model file using partition columns.
|
|
71
|
+
# 8. classes (separated by '--') - should be converted to list. "None" if no classes exists.
|
|
72
|
+
# 9. type of elements in passed in classes. "None" if no classes exists.
|
|
73
|
+
# 10. Flag to check the system type. True, means Lake, Enterprise otherwise
|
|
74
|
+
sys.exit("10 arguments command line arguments should be passed: file to be run,"
|
|
75
|
+
" function name, no of feature columns, no of class labels, comma separated indices"
|
|
76
|
+
" and types of partition columns, model file prefix ,"
|
|
77
|
+
" classes, type of elements in classes and flag to check lake or enterprise.")
|
|
78
|
+
|
|
79
|
+
is_lake_system = eval(sys.argv[9])
|
|
80
|
+
if not is_lake_system:
|
|
81
|
+
db = sys.argv[0].split("/")[1]
|
|
82
|
+
function_name = sys.argv[1]
|
|
83
|
+
n_f_cols = int(sys.argv[2])
|
|
84
|
+
n_c_labels = int(sys.argv[3])
|
|
85
|
+
data_partition_column_types = splitter(sys.argv[5])
|
|
86
|
+
data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
|
|
87
|
+
model_file_prefix = sys.argv[6]
|
|
88
|
+
class_type = sys.argv[8]
|
|
89
|
+
classes = get_classes_as_list(sys.argv[7], class_type)
|
|
90
|
+
|
|
91
|
+
model = None
|
|
92
|
+
|
|
93
|
+
# Data Format (n_features, k_labels, one data_partition_column):
|
|
94
|
+
# feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
|
|
95
|
+
# data_partition_columnn
|
|
96
|
+
# There can be no labels also.
|
|
97
|
+
|
|
98
|
+
# Read data from table through STO and build features and labels.
|
|
99
|
+
features = []
|
|
100
|
+
labels = []
|
|
101
|
+
data_partition_column_values = []
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
while 1:
|
|
105
|
+
try:
|
|
106
|
+
line = input()
|
|
107
|
+
if line == '': # Exit if user provides blank line
|
|
108
|
+
break
|
|
109
|
+
else:
|
|
110
|
+
values = line.split(DELIMITER)
|
|
111
|
+
features.append(get_values_list(values[:n_f_cols]))
|
|
112
|
+
if n_c_labels > 0:
|
|
113
|
+
labels.append(get_values_list(values[n_f_cols:(n_f_cols+n_c_labels)]))
|
|
114
|
+
if not data_partition_column_values:
|
|
115
|
+
# Partition column values is same for all rows. Hence, only read once.
|
|
116
|
+
for i, val in enumerate(data_partition_column_indices):
|
|
117
|
+
data_partition_column_values.append(
|
|
118
|
+
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Prepare the corresponding model file name and extract model.
|
|
122
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
123
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
124
|
+
partition_join = partition_join.replace("-", "_")
|
|
125
|
+
|
|
126
|
+
model_file_path = f"{model_file_prefix}_{partition_join}"\
|
|
127
|
+
if is_lake_system else \
|
|
128
|
+
f"./{db}/{model_file_prefix}_{partition_join}"
|
|
129
|
+
|
|
130
|
+
with open(model_file_path, "rb") as fp:
|
|
131
|
+
model = pickle.loads(fp.read())
|
|
132
|
+
|
|
133
|
+
if model is None:
|
|
134
|
+
sys.exit("Model file is not installed in Vantage.")
|
|
135
|
+
|
|
136
|
+
except EOFError: # Exit if reached EOF or CTRL-D
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
if not len(features):
|
|
140
|
+
sys.exit(0)
|
|
141
|
+
|
|
142
|
+
# Fit/partial_fit the model to the data.
|
|
143
|
+
if function_name == "partial_fit":
|
|
144
|
+
if labels and classes:
|
|
145
|
+
model.partial_fit(np.array(features), np.array(labels), classes=classes)
|
|
146
|
+
elif labels:
|
|
147
|
+
model.partial_fit(np.array(features), np.array(labels))
|
|
148
|
+
elif classes:
|
|
149
|
+
model.partial_fit(np.array(features), classes=classes)
|
|
150
|
+
else:
|
|
151
|
+
model.partial_fit(np.array(features))
|
|
152
|
+
elif function_name == "fit":
|
|
153
|
+
# For IsotonicRegression, fit() accepts training target as
|
|
154
|
+
# y: array-like of shape (n_samples,).
|
|
155
|
+
if labels:
|
|
156
|
+
labels = np.array(labels).reshape(-1) \
|
|
157
|
+
if model.__class__.__name__ == "IsotonicRegression" else np.array(labels)
|
|
158
|
+
model.fit(np.array(features), labels)
|
|
159
|
+
else:
|
|
160
|
+
model.fit(np.array(features))
|
|
161
|
+
|
|
162
|
+
model_str = pickle.dumps(model)
|
|
163
|
+
|
|
164
|
+
if is_lake_system:
|
|
165
|
+
model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
|
|
166
|
+
|
|
167
|
+
# Write to file in Vantage, to be used in predict/scoring.
|
|
168
|
+
with open(model_file_path, "wb") as fp:
|
|
169
|
+
fp.write(model_str)
|
|
170
|
+
|
|
171
|
+
model_data = model_file_path if is_lake_system \
|
|
172
|
+
else base64.b64encode(model_str)
|
|
173
|
+
|
|
174
|
+
# Print the model to be read from script.
|
|
175
|
+
print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pickle
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
DELIMITER = '\t'
|
|
7
|
+
|
|
8
|
+
def get_value(value):
|
|
9
|
+
ret_val = value
|
|
10
|
+
try:
|
|
11
|
+
ret_val = float(value.replace(' ', ''))
|
|
12
|
+
except Exception as ex:
|
|
13
|
+
# If the value can't be converted to float, then it is string.
|
|
14
|
+
pass
|
|
15
|
+
return ret_val
|
|
16
|
+
|
|
17
|
+
def get_values_list(values, ignore_none=True):
|
|
18
|
+
ret_vals = []
|
|
19
|
+
for val in values:
|
|
20
|
+
if val == "" and ignore_none:
|
|
21
|
+
# Empty cell value in the database table.
|
|
22
|
+
continue
|
|
23
|
+
ret_vals.append(get_value(val))
|
|
24
|
+
|
|
25
|
+
return ret_vals
|
|
26
|
+
|
|
27
|
+
def convert_to_type(val, typee):
|
|
28
|
+
if typee == 'int':
|
|
29
|
+
return int(val)
|
|
30
|
+
if typee == 'float':
|
|
31
|
+
return float(val)
|
|
32
|
+
if typee == 'bool':
|
|
33
|
+
return bool(val)
|
|
34
|
+
return str(val)
|
|
35
|
+
|
|
36
|
+
def splitter(strr, delim=",", convert_to="str"):
|
|
37
|
+
"""
|
|
38
|
+
Split the string based on delimiter and convert to the type specified.
|
|
39
|
+
"""
|
|
40
|
+
if strr == "None":
|
|
41
|
+
return []
|
|
42
|
+
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
43
|
+
|
|
44
|
+
# Arguments to the Script
|
|
45
|
+
if len(sys.argv) != 7:
|
|
46
|
+
# 6 arguments command line arguments should be passed to this file.
|
|
47
|
+
# 1: file to be run
|
|
48
|
+
# 2. No of feature columns.
|
|
49
|
+
# 3. No of class labels.
|
|
50
|
+
# 4. Comma separated indices of partition columns.
|
|
51
|
+
# 5. Comma separated types of the partition columns.
|
|
52
|
+
# 6. Model file prefix to generated model file using partition columns.
|
|
53
|
+
# 7. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
54
|
+
sys.exit("7 arguments should be passed to this file - file to be run, "\
|
|
55
|
+
"no of feature columns, no of class labels, comma separated indices and types of "\
|
|
56
|
+
"partition columns, model file prefix to generate model file using partition "\
|
|
57
|
+
"columns and flag to check lake or enterprise.")
|
|
58
|
+
|
|
59
|
+
is_lake_system = eval(sys.argv[6])
|
|
60
|
+
if not is_lake_system:
|
|
61
|
+
db = sys.argv[0].split("/")[1]
|
|
62
|
+
n_f_cols = int(sys.argv[1])
|
|
63
|
+
n_c_labels = int(sys.argv[2])
|
|
64
|
+
model_file_prefix = sys.argv[5]
|
|
65
|
+
data_partition_column_types = splitter(sys.argv[4])
|
|
66
|
+
data_partition_column_indices = splitter(sys.argv[3], convert_to="int") # indices are integers.
|
|
67
|
+
|
|
68
|
+
model = None
|
|
69
|
+
|
|
70
|
+
# Data Format (n_features, k_labels, one data_partition_columns):
|
|
71
|
+
# feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
|
|
72
|
+
# data_partition_columnn.
|
|
73
|
+
# There can be no labels also.
|
|
74
|
+
|
|
75
|
+
# Read data from table through STO and build features and labels.
|
|
76
|
+
features = []
|
|
77
|
+
labels = []
|
|
78
|
+
data_partition_column_values = []
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
while 1:
|
|
82
|
+
try:
|
|
83
|
+
line = input()
|
|
84
|
+
if line == '': # Exit if user provides blank line
|
|
85
|
+
break
|
|
86
|
+
else:
|
|
87
|
+
values = line.split(DELIMITER)
|
|
88
|
+
features.append(get_values_list(values[:n_f_cols]))
|
|
89
|
+
if n_c_labels > 0:
|
|
90
|
+
labels.append(get_values_list(values[n_f_cols:(n_f_cols+n_c_labels)]))
|
|
91
|
+
if not data_partition_column_values:
|
|
92
|
+
# Partition column values is same for all rows. Hence, only read once.
|
|
93
|
+
for i, val in enumerate(data_partition_column_indices):
|
|
94
|
+
data_partition_column_values.append(
|
|
95
|
+
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Prepare the corresponding model file name and extract model.
|
|
99
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
100
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
101
|
+
partition_join = partition_join.replace("-", "_")
|
|
102
|
+
|
|
103
|
+
model_file_path = f"{model_file_prefix}_{partition_join}" \
|
|
104
|
+
if is_lake_system else \
|
|
105
|
+
f"./{db}/{model_file_prefix}_{partition_join}"
|
|
106
|
+
|
|
107
|
+
with open(model_file_path, "rb") as fp:
|
|
108
|
+
model = pickle.loads(fp.read())
|
|
109
|
+
|
|
110
|
+
if model is None:
|
|
111
|
+
sys.exit("Model file is not installed in Vantage.")
|
|
112
|
+
|
|
113
|
+
except EOFError: # Exit if reached EOF or CTRL-D
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
if not len(features):
|
|
117
|
+
sys.exit(0)
|
|
118
|
+
|
|
119
|
+
# write code to call fit_predict with features and labels when n_c_labels > 0
|
|
120
|
+
if n_c_labels > 0:
|
|
121
|
+
predictions = model.fit_predict(np.array(features), np.array(labels))
|
|
122
|
+
else:
|
|
123
|
+
predictions = model.fit_predict(np.array(features))
|
|
124
|
+
|
|
125
|
+
# Export results to to the Databse through standard output
|
|
126
|
+
for i in range(len(predictions)):
|
|
127
|
+
if n_c_labels > 0:
|
|
128
|
+
# Add labels into output, if user passes it.
|
|
129
|
+
result_list = features[i] + labels[i] + [predictions[i]]
|
|
130
|
+
else:
|
|
131
|
+
result_list = features[i] + [predictions[i]]
|
|
132
|
+
print(*(data_partition_column_values +
|
|
133
|
+
['' if (val is None or math.isnan(val) or math.isinf(val))
|
|
134
|
+
else val for val in result_list]),
|
|
135
|
+
sep= DELIMITER)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import sys, json
|
|
2
|
+
import pickle, base64, importlib, numpy as np
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
|
|
5
|
+
func_name = "<func_name>"
|
|
6
|
+
module_name = "<module_name>"
|
|
7
|
+
params = json.loads('<params>')
|
|
8
|
+
|
|
9
|
+
DELIMITER = '\t'
|
|
10
|
+
|
|
11
|
+
def get_value(value):
|
|
12
|
+
ret_val = value
|
|
13
|
+
try:
|
|
14
|
+
ret_val = float(value.replace(' ', ''))
|
|
15
|
+
except Exception as ex:
|
|
16
|
+
# If the value can't be converted to float, then it is string.
|
|
17
|
+
pass
|
|
18
|
+
return ret_val
|
|
19
|
+
|
|
20
|
+
def convert_to_type(val, typee):
|
|
21
|
+
if typee == 'int':
|
|
22
|
+
return int(val)
|
|
23
|
+
if typee == 'float':
|
|
24
|
+
return get_value(val)
|
|
25
|
+
if typee == 'bool':
|
|
26
|
+
return bool(val)
|
|
27
|
+
return str(val)
|
|
28
|
+
|
|
29
|
+
def splitter(strr, delim=",", convert_to="str"):
|
|
30
|
+
"""
|
|
31
|
+
Split the string based on delimiter and convert to the type specified.
|
|
32
|
+
"""
|
|
33
|
+
if strr == "None":
|
|
34
|
+
return []
|
|
35
|
+
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
36
|
+
|
|
37
|
+
# Arguments to the Script.
|
|
38
|
+
if len(sys.argv) != 4:
|
|
39
|
+
# 4 arguments command line arguments should be passed to this file.
|
|
40
|
+
# 1: file to be run
|
|
41
|
+
# 2. Comma separated indices of partition columns.
|
|
42
|
+
# 3. Comma separated types of the partition columns.
|
|
43
|
+
# 4. Data columns information separted by "--" where each data column information is in the form
|
|
44
|
+
# "<arg_name>-<comma separated data indices>-<comma separated data types>".
|
|
45
|
+
sys.exit("4 arguments command line arguments should be passed: file to be run,"
|
|
46
|
+
" comma separated indices and types of partition columns, data columns information"
|
|
47
|
+
" separated by '--' where each data column information is in the form"
|
|
48
|
+
" '<arg_name>-<comma separated data indices>-<comma separated data types>'.")
|
|
49
|
+
|
|
50
|
+
db = sys.argv[0].split("/")[1]
|
|
51
|
+
data_partition_column_indices = splitter(sys.argv[1], convert_to="int") # indices are integers.
|
|
52
|
+
data_partition_column_types = splitter(sys.argv[2])
|
|
53
|
+
|
|
54
|
+
# Data related arguments information of indices and types.
|
|
55
|
+
data_args_indices_types = OrderedDict()
|
|
56
|
+
|
|
57
|
+
# Data related arguments values - prepare dictionary and populate data later.
|
|
58
|
+
data_args_values = {}
|
|
59
|
+
|
|
60
|
+
for data_arg in sys.argv[3].split("--"):
|
|
61
|
+
arg_name, indices, types = data_arg.split("-")
|
|
62
|
+
indices = splitter(indices, convert_to="int")
|
|
63
|
+
types = splitter(types)
|
|
64
|
+
|
|
65
|
+
data_args_indices_types[arg_name] = {"indices": indices, "types": types}
|
|
66
|
+
data_args_values[arg_name] = [] # Keeping empty for each data arg name and populate data later.
|
|
67
|
+
|
|
68
|
+
data_partition_column_values = []
|
|
69
|
+
data_present = False
|
|
70
|
+
|
|
71
|
+
# Read data - columns information is passed as command line argument and stored in
|
|
72
|
+
# data_args_indices_types dictionary.
|
|
73
|
+
while 1:
|
|
74
|
+
try:
|
|
75
|
+
line = input()
|
|
76
|
+
if line == '': # Exit if user provides blank line
|
|
77
|
+
break
|
|
78
|
+
else:
|
|
79
|
+
data_present = True
|
|
80
|
+
values = line.split(DELIMITER)
|
|
81
|
+
if not data_partition_column_values:
|
|
82
|
+
# Partition column values is same for all rows. Hence, only read once.
|
|
83
|
+
for i, val in enumerate(data_partition_column_indices):
|
|
84
|
+
data_partition_column_values.append(
|
|
85
|
+
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Prepare data dictionary containing only arguments related to data.
|
|
89
|
+
for arg_name in data_args_values:
|
|
90
|
+
data_indices = data_args_indices_types[arg_name]["indices"]
|
|
91
|
+
types = data_args_indices_types[arg_name]["types"]
|
|
92
|
+
cur_row = []
|
|
93
|
+
for idx, data_idx in enumerate(data_indices):
|
|
94
|
+
cur_row.append(convert_to_type(values[data_idx], types[idx]))
|
|
95
|
+
data_args_values[arg_name].append(cur_row)
|
|
96
|
+
except EOFError: # Exit if reached EOF or CTRL-D
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
if not data_present:
|
|
100
|
+
sys.exit(0)
|
|
101
|
+
|
|
102
|
+
# Update data as numpy arrays.
|
|
103
|
+
for arg_name in data_args_values:
|
|
104
|
+
np_values = np.array(data_args_values[arg_name])
|
|
105
|
+
data_args_values[arg_name] = np_values
|
|
106
|
+
|
|
107
|
+
# Combine all arguments.
|
|
108
|
+
all_args = {**data_args_values, **params}
|
|
109
|
+
|
|
110
|
+
module_ = importlib.import_module(module_name)
|
|
111
|
+
sklearn_model = getattr(module_, func_name)(**all_args)
|
|
112
|
+
|
|
113
|
+
print(*(data_partition_column_values + [base64.b64encode(pickle.dumps(sklearn_model))]), sep=DELIMITER)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
import math
|
|
3
|
+
import sys
|
|
4
|
+
import numpy as np
|
|
5
|
+
import base64
|
|
6
|
+
|
|
7
|
+
DELIMITER = '\t'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_value(value):
|
|
11
|
+
ret_val = value
|
|
12
|
+
try:
|
|
13
|
+
ret_val = round(float("".join(value.split())), 2)
|
|
14
|
+
except Exception as ex:
|
|
15
|
+
# If the value can't be converted to float, then it is string.
|
|
16
|
+
pass
|
|
17
|
+
return ret_val
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_values_list(values, ignore_none=True):
|
|
21
|
+
ret_vals = []
|
|
22
|
+
for val in values:
|
|
23
|
+
if val == "" and ignore_none:
|
|
24
|
+
# Empty cell value in the database table.
|
|
25
|
+
continue
|
|
26
|
+
ret_vals.append(get_value(val))
|
|
27
|
+
|
|
28
|
+
return ret_vals
|
|
29
|
+
|
|
30
|
+
def convert_to_type(val, typee):
|
|
31
|
+
if typee == 'int':
|
|
32
|
+
return int(val)
|
|
33
|
+
if typee == 'float':
|
|
34
|
+
return float(val)
|
|
35
|
+
if typee == 'bool':
|
|
36
|
+
return eval(val)
|
|
37
|
+
return str(val)
|
|
38
|
+
|
|
39
|
+
def splitter(strr, delim=",", convert_to="str"):
|
|
40
|
+
"""
|
|
41
|
+
Split the string based on delimiter and convert to the type specified.
|
|
42
|
+
"""
|
|
43
|
+
if strr == "None":
|
|
44
|
+
return []
|
|
45
|
+
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Arguments to the Script
|
|
49
|
+
if len(sys.argv) != 9:
|
|
50
|
+
# 9 arguments command line arguments should be passed to this file.
|
|
51
|
+
# 1: file to be run
|
|
52
|
+
# 2. function name
|
|
53
|
+
# 3. No of feature columns.
|
|
54
|
+
# 4. No of class labels.
|
|
55
|
+
# 5. No of group columns.
|
|
56
|
+
# 6. Comma separated indices of partition columns.
|
|
57
|
+
# 7. Comma separated types of the partition columns.
|
|
58
|
+
# 8. Model file prefix to generated model file using partition columns.
|
|
59
|
+
# 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
60
|
+
sys.exit("9 arguments command line arguments should be passed: file to be run,"
|
|
61
|
+
" function name, no of feature columns, no of class labels, no of group columns,"
|
|
62
|
+
" comma separated indices and types of partition columns, model file prefix to"
|
|
63
|
+
" generated model file using partition columns and flag to check lake or enterprise.")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
is_lake_system = eval(sys.argv[8])
|
|
67
|
+
if not is_lake_system:
|
|
68
|
+
db = sys.argv[0].split("/")[1]
|
|
69
|
+
function_name = sys.argv[1]
|
|
70
|
+
n_f_cols = int(sys.argv[2])
|
|
71
|
+
n_c_labels = int(sys.argv[3])
|
|
72
|
+
n_g_cols = int(sys.argv[4])
|
|
73
|
+
data_partition_column_types = splitter(sys.argv[6])
|
|
74
|
+
data_partition_column_indices = splitter(sys.argv[5], convert_to="int") # indices are integers.
|
|
75
|
+
model_file_prefix = sys.argv[7]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
model = None
|
|
79
|
+
data_partition_column_values = []
|
|
80
|
+
|
|
81
|
+
# Data Format (n_features, k_labels, one data_partition_column):
|
|
82
|
+
# feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
|
|
83
|
+
# data_partition_columnn.
|
|
84
|
+
# labels are optional.
|
|
85
|
+
|
|
86
|
+
features = []
|
|
87
|
+
labels = []
|
|
88
|
+
groups = []
|
|
89
|
+
while 1:
|
|
90
|
+
try:
|
|
91
|
+
line = input()
|
|
92
|
+
if line == '': # Exit if user provides blank line
|
|
93
|
+
break
|
|
94
|
+
else:
|
|
95
|
+
values = line.split(DELIMITER)
|
|
96
|
+
if not data_partition_column_values:
|
|
97
|
+
# Partition column values is same for all rows. Hence, only read once.
|
|
98
|
+
for i, val in enumerate(data_partition_column_indices):
|
|
99
|
+
data_partition_column_values.append(
|
|
100
|
+
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Prepare the corresponding model file name and extract model.
|
|
104
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
105
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
106
|
+
partition_join = partition_join.replace("-", "_")
|
|
107
|
+
|
|
108
|
+
model_file_path = f"{model_file_prefix}_{partition_join}" \
|
|
109
|
+
if is_lake_system else \
|
|
110
|
+
f"./{db}/{model_file_prefix}_{partition_join}"
|
|
111
|
+
|
|
112
|
+
with open(model_file_path, "rb") as fp:
|
|
113
|
+
model = pickle.loads(fp.read())
|
|
114
|
+
|
|
115
|
+
if not model:
|
|
116
|
+
sys.exit("Model file is not installed in Vantage.")
|
|
117
|
+
|
|
118
|
+
start = 0
|
|
119
|
+
if n_f_cols > 0:
|
|
120
|
+
features.append(get_values_list(values[:n_f_cols]))
|
|
121
|
+
start = start + n_f_cols
|
|
122
|
+
if n_c_labels > 0:
|
|
123
|
+
labels.append(get_values_list(values[start:(start+n_c_labels)]))
|
|
124
|
+
start = start + n_c_labels
|
|
125
|
+
if n_g_cols > 0:
|
|
126
|
+
groups.append(get_values_list(values[start:(start+n_g_cols)]))
|
|
127
|
+
|
|
128
|
+
except EOFError: # Exit if reached EOF or CTRL-D
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
if len(features) == 0:
|
|
132
|
+
sys.exit(0)
|
|
133
|
+
|
|
134
|
+
features = np.array(features) if len(features) > 0 else None
|
|
135
|
+
labels = np.array(labels).flatten() if len(labels) > 0 else None
|
|
136
|
+
groups = np.array(groups).flatten() if len(groups) > 0 else None
|
|
137
|
+
|
|
138
|
+
if function_name == "split":
|
|
139
|
+
# Printing both train and test data instead of just indices unlike sklearn.
|
|
140
|
+
# Generator is created based on split_id and type of split (train/test) in client.
|
|
141
|
+
split_id = 1
|
|
142
|
+
for train_idx, test_idx in model.split(features, labels, groups):
|
|
143
|
+
X_train, X_test = features[train_idx], features[test_idx]
|
|
144
|
+
y_train, y_test = labels[train_idx], labels[test_idx]
|
|
145
|
+
for X, y in zip(X_train, y_train):
|
|
146
|
+
print(*(data_partition_column_values + [split_id, "train"] +
|
|
147
|
+
['' if (val is None or math.isnan(val) or math.isinf(val)) else val
|
|
148
|
+
for val in X] + [y]
|
|
149
|
+
),sep=DELIMITER)
|
|
150
|
+
for X, y in zip(X_test, y_test):
|
|
151
|
+
print(*(data_partition_column_values + [split_id, "test"] +
|
|
152
|
+
['' if (val is None or math.isnan(val) or math.isinf(val)) else val
|
|
153
|
+
for val in X] + [y]
|
|
154
|
+
),sep=DELIMITER)
|
|
155
|
+
split_id += 1
|
|
156
|
+
else:
|
|
157
|
+
val = getattr(model, function_name)(features, labels, groups)
|
|
158
|
+
print(*(data_partition_column_values + [val]), sep=DELIMITER)
|