teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +183 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +2 -2
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +275 -40
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +17 -21
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1553 -319
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +276 -319
- teradataml/automl/data_transformation.py +163 -81
- teradataml/automl/feature_engineering.py +402 -239
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +48 -51
- teradataml/automl/model_training.py +291 -189
- teradataml/catalog/byom.py +8 -8
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +48 -6
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +156 -120
- teradataml/common/messagecodes.py +6 -1
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +103 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +121 -31
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/glm_example.json +28 -1
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +21 -2
- teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
- teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
- teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
- teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +97 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +9 -4
- teradataml/dataframe/data_transfer.py +125 -64
- teradataml/dataframe/dataframe.py +575 -57
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +273 -90
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +740 -18
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +324 -18
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
- teradataml/options/__init__.py +16 -5
- teradataml/options/configure.py +39 -6
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +26 -19
- teradataml/scriptmgmt/lls_utils.py +120 -16
- teradataml/table_operators/Script.py +4 -5
- teradataml/table_operators/TableOperator.py +160 -26
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +41 -3
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -59,5 +59,34 @@
|
|
|
59
59
|
"group_column" : "integer",
|
|
60
60
|
"partition_column_1" : "integer",
|
|
61
61
|
"partition_column_2" : "integer"
|
|
62
|
+
},
|
|
63
|
+
"onehot_encoder_train": {
|
|
64
|
+
"gender" : "varchar(20)",
|
|
65
|
+
"numb" : "integer"
|
|
66
|
+
},
|
|
67
|
+
"customer_segmentation_train": {
|
|
68
|
+
"ID" : "integer",
|
|
69
|
+
"Gender" : "varchar(10)",
|
|
70
|
+
"Ever_Married" : "varchar(10)",
|
|
71
|
+
"Age" : "integer",
|
|
72
|
+
"Graduated" : "varchar(10)",
|
|
73
|
+
"Profession" : "varchar(30)",
|
|
74
|
+
"Work_Experience" : "integer",
|
|
75
|
+
"Spending_Score" : "varchar(10)",
|
|
76
|
+
"Family_Size": "integer",
|
|
77
|
+
"Var_1": "varchar(10)",
|
|
78
|
+
"Segmentation": "varchar(2)"
|
|
79
|
+
},
|
|
80
|
+
"customer_segmentation_test": {
|
|
81
|
+
"ID" : "integer",
|
|
82
|
+
"Gender" : "varchar(10)",
|
|
83
|
+
"Ever_Married" : "varchar(10)",
|
|
84
|
+
"Age" : "integer",
|
|
85
|
+
"Graduated" : "varchar(10)",
|
|
86
|
+
"Profession" : "varchar(30)",
|
|
87
|
+
"Work_Experience" : "integer",
|
|
88
|
+
"Spending_Score" : "varchar(10)",
|
|
89
|
+
"Family_Size": "integer",
|
|
90
|
+
"Var_1": "varchar(10)"
|
|
62
91
|
}
|
|
63
92
|
}
|
|
Binary file
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"TD_TIMECODE","id","val"
|
|
2
|
+
2020-01-01 08:00:00,33,1.2e+02
|
|
3
|
+
2020-02-01 08:00:00,33,1.95e+02
|
|
4
|
+
2020-03-01 08:00:00,33,8e+02
|
|
5
|
+
2020-04-01 08:00:00,33,6.6e+01
|
|
6
|
+
2020-05-01 08:00:00,33,1.44e+02
|
|
7
|
+
2020-06-01 08:00:00,33,2.1e+04
|
|
8
|
+
2020-07-01 08:00:00,33,3.2e+02
|
|
9
|
+
2020-08-01 08:00:00,33,1.44e+02
|
|
10
|
+
2020-09-01 08:00:00,33,2.2e+02
|
|
11
|
+
2020-10-01 08:00:00,33,2.1e+02
|
|
12
|
+
2020-11-01 08:00:00,33,1.34e+02
|
|
13
|
+
2020-12-01 08:00:00,33,1.84e+02
|
|
14
|
+
2020-12-02 08:00:00,33,1.98e+02
|
teradataml/data/sax_example.json
CHANGED
|
@@ -19,5 +19,56 @@
|
|
|
19
19
|
"bathrms" : "real",
|
|
20
20
|
"stories" : "real"
|
|
21
21
|
|
|
22
|
-
}
|
|
22
|
+
},
|
|
23
|
+
"scale_attributes":{
|
|
24
|
+
|
|
25
|
+
"pid" : "integer",
|
|
26
|
+
"attribute_column" : "varchar(150)"
|
|
27
|
+
|
|
28
|
+
},
|
|
29
|
+
"scale_parameters":{
|
|
30
|
+
|
|
31
|
+
"pid" : "integer",
|
|
32
|
+
"parameter_column" : "varchar(150)",
|
|
33
|
+
"value_column" : "varchar(150)"
|
|
34
|
+
|
|
35
|
+
},
|
|
36
|
+
"scale_input_partitioned":{
|
|
37
|
+
"passenger" : "integer",
|
|
38
|
+
"pid" : "integer",
|
|
39
|
+
"survived" : "integer",
|
|
40
|
+
"pclass" : "integer",
|
|
41
|
+
"name" : "varchar(90)",
|
|
42
|
+
"gender" : "varchar(10)",
|
|
43
|
+
"age" : "integer",
|
|
44
|
+
"sibsp" : "integer",
|
|
45
|
+
"parch" : "integer",
|
|
46
|
+
"ticket" : "varchar(20)",
|
|
47
|
+
"fare" : "integer",
|
|
48
|
+
"cabin" : "varchar(20)",
|
|
49
|
+
"embarked" : "varchar(10)"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
"scale_input_sparse":
|
|
55
|
+
{
|
|
56
|
+
|
|
57
|
+
"passenger" : "integer",
|
|
58
|
+
"attribute_column" : "varchar(20)",
|
|
59
|
+
"attribute_value" : "real"
|
|
60
|
+
|
|
61
|
+
},
|
|
62
|
+
|
|
63
|
+
"scale_input_part_sparse":
|
|
64
|
+
{
|
|
65
|
+
|
|
66
|
+
"pid" : "integer",
|
|
67
|
+
"passenger" : "integer",
|
|
68
|
+
"attribute_column" : "varchar(20)",
|
|
69
|
+
"attribute_value" : "real"
|
|
70
|
+
|
|
23
71
|
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
pid,passenger,attribute_column,attribute_value
|
|
2
|
+
3,56,age,
|
|
3
|
+
3,56,fare,35.5
|
|
4
|
+
3,63,age,45.0
|
|
5
|
+
3,63,fare,83.475
|
|
6
|
+
3,67,age,29.0
|
|
7
|
+
3,67,fare,10.5
|
|
8
|
+
3,76,age,25.0
|
|
9
|
+
3,76,fare,7.65
|
|
10
|
+
3,93,age,46.0
|
|
11
|
+
3,93,fare,61.175
|
|
12
|
+
1,2,age,38.0
|
|
13
|
+
1,2,fare,71.2833
|
|
14
|
+
1,4,age,35.0
|
|
15
|
+
1,4,fare,53.1
|
|
16
|
+
1,7,age,54.0
|
|
17
|
+
1,7,fare,51.8625
|
|
18
|
+
1,11,age,4.0
|
|
19
|
+
1,11,fare,16.7
|
|
20
|
+
1,12,age,58.0
|
|
21
|
+
1,12,fare,26.55
|
|
22
|
+
2,22,age,34.0
|
|
23
|
+
2,22,fare,13.0
|
|
24
|
+
2,24,age,28.0
|
|
25
|
+
2,24,fare,35.5
|
|
26
|
+
2,32,age,
|
|
27
|
+
2,32,fare,146.5208
|
|
28
|
+
2,53,age,49.0
|
|
29
|
+
2,53,fare,76.7292
|
|
30
|
+
2,55,age,65.0
|
|
31
|
+
2,55,fare,61.9792
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
passenger,pid,survived,pclass,name,gender,age,sibsp,parch,ticket,fare,cabin,embarked
|
|
2
|
+
76,3,0,3,Moen; Mr. Sigurd Hansen,male,25.0,0,0,348123,7.65,F G73,S
|
|
3
|
+
32,2,1,1,Spencer; Mrs. William Augustus (Marie Eugenie),female,,1,0,PC 17569,146.5208,B78,C
|
|
4
|
+
55,2,0,1,Ostby; Mr. Engelhart Cornelius,male,65.0,0,1,113509,61.9792,B30,C
|
|
5
|
+
53,2,1,1,Harper; Mrs. Henry Sleeper (Myna Haxtun),female,49.0,1,0,PC 17572,76.7292,D33,C
|
|
6
|
+
93,3,0,1,Chaffee; Mr. Herbert Fuller,male,46.0,1,0,W.E.P. 5734,61.175,E31,S
|
|
7
|
+
11,1,1,3,Sandstrom; Miss. Marguerite Rut,female,4.0,1,1,PP 9549,16.7,G6,S
|
|
8
|
+
7,1,0,1,McCarthy; Mr. Timothy J,male,54.0,0,0,17463,51.8625,E46,S
|
|
9
|
+
24,2,1,1,Sloper; Mr. William Thompson,male,28.0,0,0,113788,35.5,A6,S
|
|
10
|
+
63,3,0,1,Harris; Mr. Henry Birkhardt,male,45.0,1,0,36973,83.475,C83,S
|
|
11
|
+
22,2,1,2,Beesley; Mr. Lawrence,male,34.0,0,0,248698,13.0,D56,S
|
|
12
|
+
56,3,1,1,Woolner; Mr. Hugh,male,,0,0,19947,35.5,C52,S
|
|
13
|
+
12,1,1,1,Bonnell; Miss. Elizabeth,female,58.0,0,0,113783,26.55,C103,S
|
|
14
|
+
2,1,1,1,Cumings; Mrs. John Bradley (Florence Briggs Thayer),female,38.0,1,0,PC 17599,71.2833,C85,C
|
|
15
|
+
67,3,1,2,Nye; Mrs. (Elizabeth Ramell),female,29.0,0,0,C.A. 29395,10.5,F33,S
|
|
16
|
+
4,1,1,1,Futrelle; Mrs. Jacques Heath (Lily May Peel),female,35.0,1,0,113803,53.1,C123,S
|
|
@@ -27,6 +27,11 @@ def get_values_list(values, ignore_none=True):
|
|
|
27
27
|
|
|
28
28
|
return ret_vals
|
|
29
29
|
|
|
30
|
+
if len(sys.argv) != 2:
|
|
31
|
+
sys.exit("Script command format: python deploy_script.py <enterprise/lake>")
|
|
32
|
+
|
|
33
|
+
vantage_type = sys.argv[1]
|
|
34
|
+
|
|
30
35
|
data_partition_column_values = []
|
|
31
36
|
data_partition_column_indices = [5, 6]
|
|
32
37
|
|
|
@@ -55,11 +60,25 @@ if not len(features):
|
|
|
55
60
|
sys.exit(0)
|
|
56
61
|
|
|
57
62
|
X = np.array(features)
|
|
58
|
-
y = np.array(labels)
|
|
63
|
+
y = np.array(labels).ravel()
|
|
59
64
|
|
|
60
65
|
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
|
|
61
66
|
clf.fit(X, y)
|
|
62
67
|
|
|
63
|
-
|
|
68
|
+
model_str = pickle.dumps(clf)
|
|
69
|
+
|
|
70
|
+
# Prepare the corresponding model file name and extract model.
|
|
71
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
72
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
73
|
+
partition_join = partition_join.replace("-", "_")
|
|
74
|
+
|
|
75
|
+
if vantage_type == "lake":
|
|
76
|
+
model = f"/tmp/sklearn_model_{partition_join}.pickle"
|
|
77
|
+
with open(model, "wb") as fp:
|
|
78
|
+
fp.write(model_str)
|
|
79
|
+
elif vantage_type == "enterprise":
|
|
80
|
+
model = base64.b64encode(model_str)
|
|
81
|
+
else:
|
|
82
|
+
sys.exit("Invalid vantage type. Use either 'lake' or 'enterprise'.")
|
|
64
83
|
|
|
65
84
|
print(*(data_partition_column_values + [model]), sep=DELIMITER)
|
|
@@ -5,35 +5,24 @@ import base64
|
|
|
5
5
|
|
|
6
6
|
DELIMITER = '\t'
|
|
7
7
|
|
|
8
|
-
def
|
|
9
|
-
ret_val = value
|
|
10
|
-
try:
|
|
11
|
-
ret_val = float(value.replace(' ', ''))
|
|
12
|
-
except Exception as ex:
|
|
13
|
-
# If the value can't be converted to float, then it is string.
|
|
14
|
-
pass
|
|
15
|
-
return ret_val
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def get_values_list(values, ignore_none=True):
|
|
8
|
+
def get_values_list(values, types, model_obj):
|
|
19
9
|
ret_vals = []
|
|
20
|
-
for val in values:
|
|
21
|
-
if
|
|
22
|
-
# Empty cell value in the database table.
|
|
10
|
+
for i, val in enumerate(values):
|
|
11
|
+
if type(model_obj).__name__ == "MultiLabelBinarizer" and val == "":
|
|
23
12
|
continue
|
|
24
|
-
ret_vals.append(
|
|
25
|
-
|
|
13
|
+
ret_vals.append(convert_to_type(val, types[i]))
|
|
26
14
|
return ret_vals
|
|
27
15
|
|
|
28
16
|
def convert_to_type(val, typee):
|
|
29
17
|
if typee == 'int':
|
|
30
|
-
return int(val)
|
|
18
|
+
return int(val) if val != "" else np.nan
|
|
31
19
|
if typee == 'float':
|
|
32
|
-
|
|
33
|
-
|
|
20
|
+
if isinstance(val, str):
|
|
21
|
+
val = val.replace(' ', '')
|
|
22
|
+
return float(val) if val != "" else np.nan
|
|
34
23
|
if typee == 'bool':
|
|
35
|
-
return
|
|
36
|
-
return str(val)
|
|
24
|
+
return eval(val) if val != "" else None
|
|
25
|
+
return str(val) if val != "" else None
|
|
37
26
|
|
|
38
27
|
def get_classes_as_list(classes, actual_type):
|
|
39
28
|
if classes == "None":
|
|
@@ -66,14 +55,14 @@ if len(sys.argv) != 10:
|
|
|
66
55
|
# 3. No of feature columns.
|
|
67
56
|
# 4. No of class labels.
|
|
68
57
|
# 5. Comma separated indices of partition columns.
|
|
69
|
-
# 6. Comma separated types of the
|
|
58
|
+
# 6. Comma separated types of all the data columns.
|
|
70
59
|
# 7. Model file prefix to generated model file using partition columns.
|
|
71
60
|
# 8. classes (separated by '--') - should be converted to list. "None" if no classes exists.
|
|
72
61
|
# 9. type of elements in passed in classes. "None" if no classes exists.
|
|
73
62
|
# 10. Flag to check the system type. True, means Lake, Enterprise otherwise
|
|
74
63
|
sys.exit("10 arguments command line arguments should be passed: file to be run,"
|
|
75
64
|
" function name, no of feature columns, no of class labels, comma separated indices"
|
|
76
|
-
"
|
|
65
|
+
" of partition columns, comma separated types of all columns, model file prefix ,"
|
|
77
66
|
" classes, type of elements in classes and flag to check lake or enterprise.")
|
|
78
67
|
|
|
79
68
|
is_lake_system = eval(sys.argv[9])
|
|
@@ -82,12 +71,14 @@ if not is_lake_system:
|
|
|
82
71
|
function_name = sys.argv[1]
|
|
83
72
|
n_f_cols = int(sys.argv[2])
|
|
84
73
|
n_c_labels = int(sys.argv[3])
|
|
85
|
-
|
|
74
|
+
data_column_types = splitter(sys.argv[5], delim="--")
|
|
86
75
|
data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
|
|
87
76
|
model_file_prefix = sys.argv[6]
|
|
88
77
|
class_type = sys.argv[8]
|
|
89
78
|
classes = get_classes_as_list(sys.argv[7], class_type)
|
|
90
79
|
|
|
80
|
+
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
81
|
+
|
|
91
82
|
model = None
|
|
92
83
|
|
|
93
84
|
# Data Format (n_features, k_labels, one data_partition_column):
|
|
@@ -108,9 +99,7 @@ while 1:
|
|
|
108
99
|
break
|
|
109
100
|
else:
|
|
110
101
|
values = line.split(DELIMITER)
|
|
111
|
-
|
|
112
|
-
if n_c_labels > 0:
|
|
113
|
-
labels.append(get_values_list(values[n_f_cols:(n_f_cols+n_c_labels)]))
|
|
102
|
+
|
|
114
103
|
if not data_partition_column_values:
|
|
115
104
|
# Partition column values is same for all rows. Hence, only read once.
|
|
116
105
|
for i, val in enumerate(data_partition_column_indices):
|
|
@@ -133,6 +122,13 @@ while 1:
|
|
|
133
122
|
if model is None:
|
|
134
123
|
sys.exit("Model file is not installed in Vantage.")
|
|
135
124
|
|
|
125
|
+
values = get_values_list(values, data_column_types, model)
|
|
126
|
+
values = values[:-len(data_partition_column_indices)] # Already processed partition columns.
|
|
127
|
+
features.append(values[:n_f_cols])
|
|
128
|
+
if n_c_labels > 0:
|
|
129
|
+
labels.append(values[n_f_cols:(n_f_cols+n_c_labels)])
|
|
130
|
+
|
|
131
|
+
|
|
136
132
|
except EOFError: # Exit if reached EOF or CTRL-D
|
|
137
133
|
break
|
|
138
134
|
|
|
@@ -142,22 +138,29 @@ if not len(features):
|
|
|
142
138
|
# Fit/partial_fit the model to the data.
|
|
143
139
|
if function_name == "partial_fit":
|
|
144
140
|
if labels and classes:
|
|
145
|
-
model.partial_fit(
|
|
141
|
+
model.partial_fit(features, labels, classes=classes)
|
|
146
142
|
elif labels:
|
|
147
|
-
model.partial_fit(
|
|
143
|
+
model.partial_fit(features, labels)
|
|
148
144
|
elif classes:
|
|
149
|
-
model.partial_fit(
|
|
145
|
+
model.partial_fit(features, classes=classes)
|
|
150
146
|
else:
|
|
151
|
-
model.partial_fit(
|
|
147
|
+
model.partial_fit(features)
|
|
152
148
|
elif function_name == "fit":
|
|
153
|
-
|
|
154
|
-
|
|
149
|
+
model_name = model.__class__.__name__
|
|
150
|
+
np_func_list = ["OneVsRestClassifier", "LabelBinarizer", "TSNE"]
|
|
155
151
|
if labels:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
152
|
+
# For IsotonicRegression, fit() accepts training target as
|
|
153
|
+
# y: array-like of shape (n_samples,).
|
|
154
|
+
if model_name in ["IsotonicRegression", "LinearSVC"]:
|
|
155
|
+
labels = np.array(labels).reshape(-1)
|
|
156
|
+
if model_name in np_func_list:
|
|
157
|
+
labels = np.array(labels)
|
|
158
|
+
features = np.array(features)
|
|
159
|
+
model.fit(features, labels)
|
|
159
160
|
else:
|
|
160
|
-
|
|
161
|
+
if model_name in np_func_list:
|
|
162
|
+
features = np.array(features)
|
|
163
|
+
model.fit(features)
|
|
161
164
|
|
|
162
165
|
model_str = pickle.dumps(model)
|
|
163
166
|
|
|
@@ -5,33 +5,22 @@ import math
|
|
|
5
5
|
|
|
6
6
|
DELIMITER = '\t'
|
|
7
7
|
|
|
8
|
-
def
|
|
9
|
-
ret_val = value
|
|
10
|
-
try:
|
|
11
|
-
ret_val = float(value.replace(' ', ''))
|
|
12
|
-
except Exception as ex:
|
|
13
|
-
# If the value can't be converted to float, then it is string.
|
|
14
|
-
pass
|
|
15
|
-
return ret_val
|
|
16
|
-
|
|
17
|
-
def get_values_list(values, ignore_none=True):
|
|
8
|
+
def get_values_list(values, types):
|
|
18
9
|
ret_vals = []
|
|
19
|
-
for val in values:
|
|
20
|
-
|
|
21
|
-
# Empty cell value in the database table.
|
|
22
|
-
continue
|
|
23
|
-
ret_vals.append(get_value(val))
|
|
24
|
-
|
|
10
|
+
for i, val in enumerate(values):
|
|
11
|
+
ret_vals.append(convert_to_type(val, types[i]))
|
|
25
12
|
return ret_vals
|
|
26
13
|
|
|
27
14
|
def convert_to_type(val, typee):
|
|
28
15
|
if typee == 'int':
|
|
29
|
-
return int(val)
|
|
16
|
+
return int(val) if val != "" else np.nan
|
|
30
17
|
if typee == 'float':
|
|
31
|
-
|
|
18
|
+
if isinstance(val, str):
|
|
19
|
+
val = val.replace(' ', '')
|
|
20
|
+
return float(val) if val != "" else np.nan
|
|
32
21
|
if typee == 'bool':
|
|
33
|
-
return
|
|
34
|
-
return str(val)
|
|
22
|
+
return eval(val) if val != "" else None
|
|
23
|
+
return str(val) if val != "" else None
|
|
35
24
|
|
|
36
25
|
def splitter(strr, delim=",", convert_to="str"):
|
|
37
26
|
"""
|
|
@@ -48,13 +37,13 @@ if len(sys.argv) != 7:
|
|
|
48
37
|
# 2. No of feature columns.
|
|
49
38
|
# 3. No of class labels.
|
|
50
39
|
# 4. Comma separated indices of partition columns.
|
|
51
|
-
# 5. Comma separated types of the
|
|
40
|
+
# 5. Comma separated types of all the data columns.
|
|
52
41
|
# 6. Model file prefix to generated model file using partition columns.
|
|
53
42
|
# 7. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
54
43
|
sys.exit("7 arguments should be passed to this file - file to be run, "\
|
|
55
|
-
"no of feature columns, no of class labels, comma separated indices
|
|
56
|
-
"
|
|
57
|
-
"columns and flag to check lake or enterprise.")
|
|
44
|
+
"no of feature columns, no of class labels, comma separated indices of partition "
|
|
45
|
+
"columns, comma separated types of all columns, model file prefix to generate model "
|
|
46
|
+
"file using partition columns and flag to check lake or enterprise.")
|
|
58
47
|
|
|
59
48
|
is_lake_system = eval(sys.argv[6])
|
|
60
49
|
if not is_lake_system:
|
|
@@ -62,9 +51,11 @@ if not is_lake_system:
|
|
|
62
51
|
n_f_cols = int(sys.argv[1])
|
|
63
52
|
n_c_labels = int(sys.argv[2])
|
|
64
53
|
model_file_prefix = sys.argv[5]
|
|
65
|
-
|
|
54
|
+
data_column_types = splitter(sys.argv[4], delim="--")
|
|
66
55
|
data_partition_column_indices = splitter(sys.argv[3], convert_to="int") # indices are integers.
|
|
67
56
|
|
|
57
|
+
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
58
|
+
|
|
68
59
|
model = None
|
|
69
60
|
|
|
70
61
|
# Data Format (n_features, k_labels, one data_partition_columns):
|
|
@@ -85,9 +76,10 @@ while 1:
|
|
|
85
76
|
break
|
|
86
77
|
else:
|
|
87
78
|
values = line.split(DELIMITER)
|
|
88
|
-
|
|
79
|
+
values = get_values_list(values, data_column_types)
|
|
80
|
+
features.append(values[:n_f_cols])
|
|
89
81
|
if n_c_labels > 0:
|
|
90
|
-
labels.append(
|
|
82
|
+
labels.append(values[n_f_cols:(n_f_cols+n_c_labels)])
|
|
91
83
|
if not data_partition_column_values:
|
|
92
84
|
# Partition column values is same for all rows. Hence, only read once.
|
|
93
85
|
for i, val in enumerate(data_partition_column_indices):
|
|
@@ -118,9 +110,9 @@ if not len(features):
|
|
|
118
110
|
|
|
119
111
|
# write code to call fit_predict with features and labels when n_c_labels > 0
|
|
120
112
|
if n_c_labels > 0:
|
|
121
|
-
predictions = model.fit_predict(
|
|
113
|
+
predictions = model.fit_predict(features, labels)
|
|
122
114
|
else:
|
|
123
|
-
predictions = model.fit_predict(
|
|
115
|
+
predictions = model.fit_predict(features)
|
|
124
116
|
|
|
125
117
|
# Export results to to the Databse through standard output
|
|
126
118
|
for i in range(len(predictions)):
|
|
@@ -130,6 +122,6 @@ for i in range(len(predictions)):
|
|
|
130
122
|
else:
|
|
131
123
|
result_list = features[i] + [predictions[i]]
|
|
132
124
|
print(*(data_partition_column_values +
|
|
133
|
-
['' if (val is None or math.isnan(val) or math.isinf(val))
|
|
125
|
+
['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val))))
|
|
134
126
|
else val for val in result_list]),
|
|
135
127
|
sep= DELIMITER)
|
|
@@ -8,23 +8,16 @@ params = json.loads('<params>')
|
|
|
8
8
|
|
|
9
9
|
DELIMITER = '\t'
|
|
10
10
|
|
|
11
|
-
def get_value(value):
|
|
12
|
-
ret_val = value
|
|
13
|
-
try:
|
|
14
|
-
ret_val = float(value.replace(' ', ''))
|
|
15
|
-
except Exception as ex:
|
|
16
|
-
# If the value can't be converted to float, then it is string.
|
|
17
|
-
pass
|
|
18
|
-
return ret_val
|
|
19
|
-
|
|
20
11
|
def convert_to_type(val, typee):
|
|
21
12
|
if typee == 'int':
|
|
22
|
-
return int(val)
|
|
13
|
+
return int(val) if val != "" else np.nan
|
|
23
14
|
if typee == 'float':
|
|
24
|
-
|
|
15
|
+
if isinstance(val, str):
|
|
16
|
+
val = val.replace(' ', '')
|
|
17
|
+
return float(val) if val != "" else np.nan
|
|
25
18
|
if typee == 'bool':
|
|
26
|
-
return
|
|
27
|
-
return str(val)
|
|
19
|
+
return eval(val) if val != "" else None
|
|
20
|
+
return str(val) if val != "" else None
|
|
28
21
|
|
|
29
22
|
def splitter(strr, delim=",", convert_to="str"):
|
|
30
23
|
"""
|
|
@@ -35,21 +28,30 @@ def splitter(strr, delim=",", convert_to="str"):
|
|
|
35
28
|
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
36
29
|
|
|
37
30
|
# Arguments to the Script.
|
|
38
|
-
if len(sys.argv) !=
|
|
39
|
-
#
|
|
31
|
+
if len(sys.argv) != 6:
|
|
32
|
+
# 5 arguments command line arguments should be passed to this file.
|
|
40
33
|
# 1: file to be run
|
|
41
34
|
# 2. Comma separated indices of partition columns.
|
|
42
|
-
# 3. Comma separated types of the
|
|
35
|
+
# 3. Comma separated types of all the data columns.
|
|
43
36
|
# 4. Data columns information separted by "--" where each data column information is in the form
|
|
44
37
|
# "<arg_name>-<comma separated data indices>-<comma separated data types>".
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
"
|
|
49
|
-
|
|
50
|
-
|
|
38
|
+
# 5. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
39
|
+
# 6. Model file prefix for lake system, None otherwise.
|
|
40
|
+
sys.exit("5 arguments command line arguments should be passed: file to be run,"
|
|
41
|
+
" comma separated indices of partition columns, comma separated types of all columns,"
|
|
42
|
+
" data columns information separated by '--' where each data column information is"
|
|
43
|
+
" in the form '<arg_name>-<comma separated data indices>-<comma separated data types>',"
|
|
44
|
+
" flag to check lake or enterprise and model file prefix used only for lake system.")
|
|
45
|
+
|
|
46
|
+
is_lake_system = eval(sys.argv[4])
|
|
47
|
+
if not is_lake_system:
|
|
48
|
+
db = sys.argv[0].split("/")[1]
|
|
49
|
+
else:
|
|
50
|
+
model_file_prefix = sys.argv[5]
|
|
51
51
|
data_partition_column_indices = splitter(sys.argv[1], convert_to="int") # indices are integers.
|
|
52
|
-
|
|
52
|
+
data_column_types = splitter(sys.argv[2], delim="--")
|
|
53
|
+
|
|
54
|
+
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
53
55
|
|
|
54
56
|
# Data related arguments information of indices and types.
|
|
55
57
|
data_args_indices_types = OrderedDict()
|
|
@@ -84,6 +86,11 @@ while 1:
|
|
|
84
86
|
data_partition_column_values.append(
|
|
85
87
|
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
86
88
|
)
|
|
89
|
+
|
|
90
|
+
# Prepare the corresponding model file name and extract model.
|
|
91
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
92
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
93
|
+
partition_join = partition_join.replace("-", "_")
|
|
87
94
|
|
|
88
95
|
# Prepare data dictionary containing only arguments related to data.
|
|
89
96
|
for arg_name in data_args_values:
|
|
@@ -110,4 +117,15 @@ all_args = {**data_args_values, **params}
|
|
|
110
117
|
module_ = importlib.import_module(module_name)
|
|
111
118
|
sklearn_model = getattr(module_, func_name)(**all_args)
|
|
112
119
|
|
|
113
|
-
|
|
120
|
+
model_str = pickle.dumps(sklearn_model)
|
|
121
|
+
|
|
122
|
+
if is_lake_system:
|
|
123
|
+
model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
|
|
124
|
+
|
|
125
|
+
# Write to file in Vantage, to be used in predict/scoring.
|
|
126
|
+
with open(model_file_path, "wb") as fp:
|
|
127
|
+
fp.write(model_str)
|
|
128
|
+
|
|
129
|
+
model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
|
|
130
|
+
|
|
131
|
+
print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
|