teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +10 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +299 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +13 -3
- teradataml/analytics/json_parser/utils.py +13 -6
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +11 -2
- teradataml/analytics/table_operator/__init__.py +4 -3
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +66 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +247 -307
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +325 -86
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +122 -153
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +72 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +152 -120
- teradataml/common/messagecodes.py +11 -2
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +26 -4
- teradataml/common/utils.py +225 -14
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +82 -2
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +27 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +1002 -201
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +867 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +840 -33
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +878 -34
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
- teradataml/options/__init__.py +9 -23
- teradataml/options/configure.py +42 -4
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +13 -9
- teradataml/scriptmgmt/lls_utils.py +77 -23
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +102 -56
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +34 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -28,21 +28,22 @@ def splitter(strr, delim=",", convert_to="str"):
|
|
|
28
28
|
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
29
29
|
|
|
30
30
|
# Arguments to the Script.
|
|
31
|
-
if len(sys.argv) !=
|
|
32
|
-
#
|
|
31
|
+
if len(sys.argv) != 3:
|
|
32
|
+
# 3 command line arguments should be passed to this file.
|
|
33
33
|
# 1: file to be run
|
|
34
|
-
# 2.
|
|
35
|
-
# 3.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
34
|
+
# 2. Model file prefix for lake system, None otherwise.
|
|
35
|
+
# 3. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
36
|
+
sys.exit("3 arguments command line arguments should be passed: file to be run,"
|
|
37
|
+
" model file prefix used only for lake system and flag to check lake or enterprise.")
|
|
38
|
+
|
|
39
|
+
is_lake_system = eval(sys.argv[2])
|
|
40
|
+
if not is_lake_system:
|
|
41
|
+
db = sys.argv[0].split("/")[1]
|
|
42
|
+
else:
|
|
43
|
+
model_file_prefix = sys.argv[1]
|
|
44
|
+
|
|
45
|
+
data_partition_column_indices = <partition_cols_indices>
|
|
46
|
+
data_column_types = <types_of_data_cols>
|
|
46
47
|
|
|
47
48
|
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
48
49
|
|
|
@@ -52,7 +53,8 @@ data_args_indices_types = OrderedDict()
|
|
|
52
53
|
# Data related arguments values - prepare dictionary and populate data later.
|
|
53
54
|
data_args_values = {}
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
data_args_info_str = <data_args_info_str>
|
|
57
|
+
for data_arg in data_args_info_str.split("--"):
|
|
56
58
|
arg_name, indices, types = data_arg.split("-")
|
|
57
59
|
indices = splitter(indices, convert_to="int")
|
|
58
60
|
types = splitter(types)
|
|
@@ -79,6 +81,11 @@ while 1:
|
|
|
79
81
|
data_partition_column_values.append(
|
|
80
82
|
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
81
83
|
)
|
|
84
|
+
|
|
85
|
+
# Prepare the corresponding model file name and extract model.
|
|
86
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
87
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
88
|
+
partition_join = partition_join.replace("-", "_")
|
|
82
89
|
|
|
83
90
|
# Prepare data dictionary containing only arguments related to data.
|
|
84
91
|
for arg_name in data_args_values:
|
|
@@ -105,4 +112,15 @@ all_args = {**data_args_values, **params}
|
|
|
105
112
|
module_ = importlib.import_module(module_name)
|
|
106
113
|
sklearn_model = getattr(module_, func_name)(**all_args)
|
|
107
114
|
|
|
108
|
-
|
|
115
|
+
model_str = pickle.dumps(sklearn_model)
|
|
116
|
+
|
|
117
|
+
if is_lake_system:
|
|
118
|
+
model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
|
|
119
|
+
|
|
120
|
+
# Write to file in Vantage, to be used in predict/scoring.
|
|
121
|
+
with open(model_file_path, "wb") as fp:
|
|
122
|
+
fp.write(model_str)
|
|
123
|
+
|
|
124
|
+
model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
|
|
125
|
+
|
|
126
|
+
print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
|
|
@@ -3,146 +3,164 @@ import math
|
|
|
3
3
|
import sys
|
|
4
4
|
import numpy as np
|
|
5
5
|
import base64
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
import os
|
|
6
8
|
|
|
7
9
|
DELIMITER = '\t'
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
for i, val in enumerate(values):
|
|
12
|
-
ret_vals.append(convert_to_type(val, types[i]))
|
|
13
|
-
return ret_vals
|
|
14
|
-
|
|
15
|
-
def convert_to_type(val, typee):
|
|
16
|
-
if typee == 'int':
|
|
17
|
-
return int(val) if val != "" else np.nan
|
|
18
|
-
if typee == 'float':
|
|
19
|
-
if isinstance(val, str):
|
|
20
|
-
val = val.replace(' ', '')
|
|
21
|
-
return float(val) if val != "" else np.nan
|
|
22
|
-
if typee == 'bool':
|
|
23
|
-
return eval(val) if val != "" else None
|
|
24
|
-
return str(val) if val != "" else None
|
|
25
|
-
|
|
26
|
-
def splitter(strr, delim=",", convert_to="str"):
|
|
11
|
+
@contextmanager
|
|
12
|
+
def suppress_stderr():
|
|
27
13
|
"""
|
|
28
|
-
|
|
14
|
+
Function to suppress the warnings(lake systems treats warnings as errors).
|
|
29
15
|
"""
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
#
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
16
|
+
with open(os.devnull, "w") as devnull:
|
|
17
|
+
old_stderr = sys.stderr
|
|
18
|
+
sys.stderr = devnull
|
|
19
|
+
try:
|
|
20
|
+
yield
|
|
21
|
+
finally:
|
|
22
|
+
sys.stderr = old_stderr
|
|
23
|
+
|
|
24
|
+
## On Lake system warnings raised by script are treated as a errors.
|
|
25
|
+
## Hence, to suppress it putting the under suppress_stderr().
|
|
26
|
+
with suppress_stderr():
|
|
27
|
+
def get_values_list(values, types):
|
|
28
|
+
ret_vals = []
|
|
29
|
+
for i, val in enumerate(values):
|
|
30
|
+
ret_vals.append(convert_to_type(val, types[i]))
|
|
31
|
+
return ret_vals
|
|
32
|
+
|
|
33
|
+
def convert_to_type(val, typee):
|
|
34
|
+
if typee == 'int':
|
|
35
|
+
return int(val) if val != "" else np.nan
|
|
36
|
+
if typee == 'float':
|
|
37
|
+
if isinstance(val, str):
|
|
38
|
+
val = val.replace(' ', '')
|
|
39
|
+
return float(val) if val != "" else np.nan
|
|
40
|
+
if typee == 'bool':
|
|
41
|
+
return eval(val) if val != "" else None
|
|
42
|
+
return str(val) if val != "" else None
|
|
43
|
+
|
|
44
|
+
def splitter(strr, delim=",", convert_to="str"):
|
|
45
|
+
"""
|
|
46
|
+
Split the string based on delimiter and convert to the type specified.
|
|
47
|
+
"""
|
|
48
|
+
if strr == "None":
|
|
49
|
+
return []
|
|
50
|
+
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Arguments to the Script
|
|
54
|
+
if len(sys.argv) != 9:
|
|
55
|
+
# 9 arguments command line arguments should be passed to this file.
|
|
56
|
+
# 1: file to be run
|
|
57
|
+
# 2. function name
|
|
58
|
+
# 3. No of feature columns.
|
|
59
|
+
# 4. No of class labels.
|
|
60
|
+
# 5. No of group columns.
|
|
61
|
+
# 6. Comma separated indices of partition columns.
|
|
62
|
+
# 7. Comma separated types of all the data columns.
|
|
63
|
+
# 8. Model file prefix to generated model file using partition columns.
|
|
64
|
+
# 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
65
|
+
sys.exit("9 arguments command line arguments should be passed: file to be run,"
|
|
66
|
+
" function name, no of feature columns, no of class labels, no of group columns,"
|
|
67
|
+
" comma separated indices of partition columns, comma separated types of all columns,"
|
|
68
|
+
" model file prefix to generated model file using partition columns and flag to check"
|
|
69
|
+
" lake or enterprise.")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
is_lake_system = eval(sys.argv[8])
|
|
73
|
+
if not is_lake_system:
|
|
74
|
+
db = sys.argv[0].split("/")[1]
|
|
75
|
+
function_name = sys.argv[1]
|
|
76
|
+
n_f_cols = int(sys.argv[2])
|
|
77
|
+
n_c_labels = int(sys.argv[3])
|
|
78
|
+
n_g_cols = int(sys.argv[4])
|
|
79
|
+
data_column_types = splitter(sys.argv[6], delim="--")
|
|
80
|
+
data_partition_column_indices = splitter(sys.argv[5], convert_to="int") # indices are integers.
|
|
81
|
+
model_file_prefix = sys.argv[7]
|
|
82
|
+
|
|
83
|
+
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
84
|
+
|
|
85
|
+
model = None
|
|
86
|
+
data_partition_column_values = []
|
|
87
|
+
|
|
88
|
+
# Data Format (n_features, k_labels, one data_partition_column):
|
|
89
|
+
# feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
|
|
90
|
+
# data_partition_columnn.
|
|
91
|
+
# labels are optional.
|
|
92
|
+
|
|
93
|
+
features = []
|
|
94
|
+
labels = []
|
|
95
|
+
groups = []
|
|
96
|
+
while 1:
|
|
97
|
+
try:
|
|
98
|
+
line = input()
|
|
99
|
+
if line == '': # Exit if user provides blank line
|
|
100
|
+
break
|
|
101
|
+
else:
|
|
102
|
+
values = line.split(DELIMITER)
|
|
103
|
+
values = get_values_list(values, data_column_types)
|
|
104
|
+
if not data_partition_column_values:
|
|
105
|
+
# Partition column values is same for all rows. Hence, only read once.
|
|
106
|
+
for i, val in enumerate(data_partition_column_indices):
|
|
107
|
+
data_partition_column_values.append(
|
|
108
|
+
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Prepare the corresponding model file name and extract model.
|
|
112
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
113
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
114
|
+
partition_join = partition_join.replace("-", "_")
|
|
115
|
+
|
|
116
|
+
model_file_path = f"{model_file_prefix}_{partition_join}" \
|
|
117
|
+
if is_lake_system else \
|
|
118
|
+
f"./{db}/{model_file_prefix}_{partition_join}"
|
|
119
|
+
|
|
120
|
+
with open(model_file_path, "rb") as fp:
|
|
121
|
+
model = pickle.loads(fp.read())
|
|
122
|
+
|
|
123
|
+
if not model:
|
|
124
|
+
sys.exit("Model file is not installed in Vantage.")
|
|
125
|
+
|
|
126
|
+
start = 0
|
|
127
|
+
if n_f_cols > 0:
|
|
128
|
+
features.append(values[:n_f_cols])
|
|
129
|
+
start = start + n_f_cols
|
|
130
|
+
if n_c_labels > 0:
|
|
131
|
+
labels.append(values[start:(start+n_c_labels)])
|
|
132
|
+
start = start + n_c_labels
|
|
133
|
+
if n_g_cols > 0:
|
|
134
|
+
groups.append(values[start:(start+n_g_cols)])
|
|
135
|
+
|
|
136
|
+
except EOFError: # Exit if reached EOF or CTRL-D
|
|
82
137
|
break
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if n_c_labels > 0:
|
|
113
|
-
labels.append(values[start:(start+n_c_labels)])
|
|
114
|
-
start = start + n_c_labels
|
|
115
|
-
if n_g_cols > 0:
|
|
116
|
-
groups.append(values[start:(start+n_g_cols)])
|
|
117
|
-
|
|
118
|
-
except EOFError: # Exit if reached EOF or CTRL-D
|
|
119
|
-
break
|
|
120
|
-
|
|
121
|
-
if len(features) == 0:
|
|
122
|
-
sys.exit(0)
|
|
123
|
-
|
|
124
|
-
features = np.array(features) if len(features) > 0 else None
|
|
125
|
-
labels = np.array(labels).flatten() if len(labels) > 0 else None
|
|
126
|
-
groups = np.array(groups).flatten() if len(groups) > 0 else None
|
|
127
|
-
|
|
128
|
-
if function_name == "split":
|
|
129
|
-
# Printing both train and test data instead of just indices unlike sklearn.
|
|
130
|
-
# Generator is created based on split_id and type of split (train/test) in client.
|
|
131
|
-
split_id = 1
|
|
132
|
-
for train_idx, test_idx in model.split(features, labels, groups):
|
|
133
|
-
X_train, X_test = features[train_idx], features[test_idx]
|
|
134
|
-
y_train, y_test = labels[train_idx], labels[test_idx]
|
|
135
|
-
for X, y in zip(X_train, y_train):
|
|
136
|
-
print(*(data_partition_column_values + [split_id, "train"] +
|
|
137
|
-
['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
|
|
138
|
-
for val in X] + [y]
|
|
139
|
-
), sep=DELIMITER)
|
|
140
|
-
for X, y in zip(X_test, y_test):
|
|
141
|
-
print(*(data_partition_column_values + [split_id, "test"] +
|
|
142
|
-
['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
|
|
143
|
-
for val in X] + [y]
|
|
144
|
-
), sep=DELIMITER)
|
|
145
|
-
split_id += 1
|
|
146
|
-
else:
|
|
147
|
-
val = getattr(model, function_name)(features, labels, groups)
|
|
148
|
-
print(*(data_partition_column_values + [val]), sep=DELIMITER)
|
|
138
|
+
|
|
139
|
+
if len(features) == 0:
|
|
140
|
+
sys.exit(0)
|
|
141
|
+
|
|
142
|
+
features = np.array(features) if len(features) > 0 else None
|
|
143
|
+
labels = np.array(labels).flatten() if len(labels) > 0 else None
|
|
144
|
+
groups = np.array(groups).flatten() if len(groups) > 0 else None
|
|
145
|
+
|
|
146
|
+
if function_name == "split":
|
|
147
|
+
# Printing both train and test data instead of just indices unlike sklearn.
|
|
148
|
+
# Generator is created based on split_id and type of split (train/test) in client.
|
|
149
|
+
split_id = 1
|
|
150
|
+
for train_idx, test_idx in model.split(features, labels, groups):
|
|
151
|
+
X_train, X_test = features[train_idx], features[test_idx]
|
|
152
|
+
y_train, y_test = labels[train_idx], labels[test_idx]
|
|
153
|
+
for X, y in zip(X_train, y_train):
|
|
154
|
+
print(*(data_partition_column_values + [split_id, "train"] +
|
|
155
|
+
['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
|
|
156
|
+
for val in X] + [y]
|
|
157
|
+
), sep=DELIMITER)
|
|
158
|
+
for X, y in zip(X_test, y_test):
|
|
159
|
+
print(*(data_partition_column_values + [split_id, "test"] +
|
|
160
|
+
['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
|
|
161
|
+
for val in X] + [y]
|
|
162
|
+
), sep=DELIMITER)
|
|
163
|
+
split_id += 1
|
|
164
|
+
else:
|
|
165
|
+
val = getattr(model, function_name)(features, labels, groups)
|
|
166
|
+
print(*(data_partition_column_values + [val]), sep=DELIMITER)
|
|
@@ -116,7 +116,7 @@ while 1:
|
|
|
116
116
|
|
|
117
117
|
f_ = values[:n_f_cols]
|
|
118
118
|
if f_:
|
|
119
|
-
output = getattr(model, func_name)(
|
|
119
|
+
output = getattr(model, func_name)([f_], **arguments)
|
|
120
120
|
else:
|
|
121
121
|
output = getattr(model, func_name)(**arguments)
|
|
122
122
|
result_list = f_
|
|
@@ -110,10 +110,19 @@ while 1:
|
|
|
110
110
|
if len(features) == 0:
|
|
111
111
|
sys.exit(0)
|
|
112
112
|
|
|
113
|
+
|
|
114
|
+
model_name = model.__class__.__name__
|
|
115
|
+
np_func_list = ["MultiOutputClassifier", "GaussianMixture"]
|
|
116
|
+
|
|
117
|
+
if model_name in np_func_list:
|
|
118
|
+
features = np.array(features)
|
|
119
|
+
|
|
113
120
|
if labels:
|
|
114
|
-
|
|
121
|
+
if model_name in np_func_list:
|
|
122
|
+
labels = np.array(labels)
|
|
123
|
+
val = getattr(model, func_name)(features, labels)
|
|
115
124
|
else:
|
|
116
|
-
val = getattr(model, func_name)(
|
|
125
|
+
val = getattr(model, func_name)(features)
|
|
117
126
|
|
|
118
127
|
result_val = ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val]
|
|
119
|
-
print(*(data_partition_column_values + result_val), sep=DELIMITER)
|
|
128
|
+
print(*(data_partition_column_values + result_val), sep=DELIMITER)
|