teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +112 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +224 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +6 -4
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +245 -306
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +313 -82
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +109 -146
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/constants.py +37 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +151 -120
- teradataml/common/messagecodes.py +4 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +97 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +72 -2
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
- teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +474 -41
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +658 -20
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +322 -16
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
- teradataml/options/__init__.py +3 -1
- teradataml/options/configure.py +14 -2
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +10 -6
- teradataml/scriptmgmt/lls_utils.py +3 -2
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +1 -1
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -138,22 +138,29 @@ if not len(features):
|
|
|
138
138
|
# Fit/partial_fit the model to the data.
|
|
139
139
|
if function_name == "partial_fit":
|
|
140
140
|
if labels and classes:
|
|
141
|
-
model.partial_fit(
|
|
141
|
+
model.partial_fit(features, labels, classes=classes)
|
|
142
142
|
elif labels:
|
|
143
|
-
model.partial_fit(
|
|
143
|
+
model.partial_fit(features, labels)
|
|
144
144
|
elif classes:
|
|
145
|
-
model.partial_fit(
|
|
145
|
+
model.partial_fit(features, classes=classes)
|
|
146
146
|
else:
|
|
147
|
-
model.partial_fit(
|
|
147
|
+
model.partial_fit(features)
|
|
148
148
|
elif function_name == "fit":
|
|
149
|
-
|
|
150
|
-
|
|
149
|
+
model_name = model.__class__.__name__
|
|
150
|
+
np_func_list = ["OneVsRestClassifier", "LabelBinarizer", "TSNE"]
|
|
151
151
|
if labels:
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
152
|
+
# For IsotonicRegression, fit() accepts training target as
|
|
153
|
+
# y: array-like of shape (n_samples,).
|
|
154
|
+
if model_name in ["IsotonicRegression", "LinearSVC"]:
|
|
155
|
+
labels = np.array(labels).reshape(-1)
|
|
156
|
+
if model_name in np_func_list:
|
|
157
|
+
labels = np.array(labels)
|
|
158
|
+
features = np.array(features)
|
|
159
|
+
model.fit(features, labels)
|
|
155
160
|
else:
|
|
156
|
-
|
|
161
|
+
if model_name in np_func_list:
|
|
162
|
+
features = np.array(features)
|
|
163
|
+
model.fit(features)
|
|
157
164
|
|
|
158
165
|
model_str = pickle.dumps(model)
|
|
159
166
|
|
|
@@ -110,9 +110,9 @@ if not len(features):
|
|
|
110
110
|
|
|
111
111
|
# write code to call fit_predict with features and labels when n_c_labels > 0
|
|
112
112
|
if n_c_labels > 0:
|
|
113
|
-
predictions = model.fit_predict(
|
|
113
|
+
predictions = model.fit_predict(features, labels)
|
|
114
114
|
else:
|
|
115
|
-
predictions = model.fit_predict(
|
|
115
|
+
predictions = model.fit_predict(features)
|
|
116
116
|
|
|
117
117
|
# Export results to to the Databse through standard output
|
|
118
118
|
for i in range(len(predictions)):
|
|
@@ -28,19 +28,26 @@ def splitter(strr, delim=",", convert_to="str"):
|
|
|
28
28
|
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
29
29
|
|
|
30
30
|
# Arguments to the Script.
|
|
31
|
-
if len(sys.argv) !=
|
|
32
|
-
#
|
|
31
|
+
if len(sys.argv) != 6:
|
|
32
|
+
# 5 arguments command line arguments should be passed to this file.
|
|
33
33
|
# 1: file to be run
|
|
34
34
|
# 2. Comma separated indices of partition columns.
|
|
35
35
|
# 3. Comma separated types of all the data columns.
|
|
36
36
|
# 4. Data columns information separted by "--" where each data column information is in the form
|
|
37
37
|
# "<arg_name>-<comma separated data indices>-<comma separated data types>".
|
|
38
|
-
|
|
38
|
+
# 5. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
39
|
+
# 6. Model file prefix for lake system, None otherwise.
|
|
40
|
+
sys.exit("5 arguments command line arguments should be passed: file to be run,"
|
|
39
41
|
" comma separated indices of partition columns, comma separated types of all columns,"
|
|
40
42
|
" data columns information separated by '--' where each data column information is"
|
|
41
|
-
" in the form '<arg_name>-<comma separated data indices>-<comma separated data types>'
|
|
42
|
-
|
|
43
|
-
|
|
43
|
+
" in the form '<arg_name>-<comma separated data indices>-<comma separated data types>',"
|
|
44
|
+
" flag to check lake or enterprise and model file prefix used only for lake system.")
|
|
45
|
+
|
|
46
|
+
is_lake_system = eval(sys.argv[4])
|
|
47
|
+
if not is_lake_system:
|
|
48
|
+
db = sys.argv[0].split("/")[1]
|
|
49
|
+
else:
|
|
50
|
+
model_file_prefix = sys.argv[5]
|
|
44
51
|
data_partition_column_indices = splitter(sys.argv[1], convert_to="int") # indices are integers.
|
|
45
52
|
data_column_types = splitter(sys.argv[2], delim="--")
|
|
46
53
|
|
|
@@ -79,6 +86,11 @@ while 1:
|
|
|
79
86
|
data_partition_column_values.append(
|
|
80
87
|
convert_to_type(values[val], typee=data_partition_column_types[i])
|
|
81
88
|
)
|
|
89
|
+
|
|
90
|
+
# Prepare the corresponding model file name and extract model.
|
|
91
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
92
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
93
|
+
partition_join = partition_join.replace("-", "_")
|
|
82
94
|
|
|
83
95
|
# Prepare data dictionary containing only arguments related to data.
|
|
84
96
|
for arg_name in data_args_values:
|
|
@@ -105,4 +117,15 @@ all_args = {**data_args_values, **params}
|
|
|
105
117
|
module_ = importlib.import_module(module_name)
|
|
106
118
|
sklearn_model = getattr(module_, func_name)(**all_args)
|
|
107
119
|
|
|
108
|
-
|
|
120
|
+
model_str = pickle.dumps(sklearn_model)
|
|
121
|
+
|
|
122
|
+
if is_lake_system:
|
|
123
|
+
model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
|
|
124
|
+
|
|
125
|
+
# Write to file in Vantage, to be used in predict/scoring.
|
|
126
|
+
with open(model_file_path, "wb") as fp:
|
|
127
|
+
fp.write(model_str)
|
|
128
|
+
|
|
129
|
+
model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
|
|
130
|
+
|
|
131
|
+
print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
|
|
@@ -116,7 +116,7 @@ while 1:
|
|
|
116
116
|
|
|
117
117
|
f_ = values[:n_f_cols]
|
|
118
118
|
if f_:
|
|
119
|
-
output = getattr(model, func_name)(
|
|
119
|
+
output = getattr(model, func_name)([f_], **arguments)
|
|
120
120
|
else:
|
|
121
121
|
output = getattr(model, func_name)(**arguments)
|
|
122
122
|
result_list = f_
|
|
@@ -110,10 +110,19 @@ while 1:
|
|
|
110
110
|
if len(features) == 0:
|
|
111
111
|
sys.exit(0)
|
|
112
112
|
|
|
113
|
+
|
|
114
|
+
model_name = model.__class__.__name__
|
|
115
|
+
np_func_list = ["MultiOutputClassifier", "GaussianMixture"]
|
|
116
|
+
|
|
117
|
+
if model_name in np_func_list:
|
|
118
|
+
features = np.array(features)
|
|
119
|
+
|
|
113
120
|
if labels:
|
|
114
|
-
|
|
121
|
+
if model_name in np_func_list:
|
|
122
|
+
labels = np.array(labels)
|
|
123
|
+
val = getattr(model, func_name)(features, labels)
|
|
115
124
|
else:
|
|
116
|
-
val = getattr(model, func_name)(
|
|
125
|
+
val = getattr(model, func_name)(features)
|
|
117
126
|
|
|
118
127
|
result_val = ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val]
|
|
119
|
-
print(*(data_partition_column_values + result_val), sep=DELIMITER)
|
|
128
|
+
print(*(data_partition_column_values + result_val), sep=DELIMITER)
|
|
@@ -92,11 +92,14 @@ data_partition_column_types = [data_column_types[idx] for idx in data_partition_
|
|
|
92
92
|
model = None
|
|
93
93
|
data_partition_column_values = []
|
|
94
94
|
|
|
95
|
+
missing_indicator_input = []
|
|
96
|
+
|
|
95
97
|
# Data Format:
|
|
96
98
|
# feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
|
|
97
99
|
# data_partition_columnn.
|
|
98
100
|
# label is optional (it is present when label_exists is not "None")
|
|
99
101
|
|
|
102
|
+
model_name = ""
|
|
100
103
|
while 1:
|
|
101
104
|
try:
|
|
102
105
|
line = input()
|
|
@@ -128,9 +131,33 @@ while 1:
|
|
|
128
131
|
sys.exit("Model file is not installed in Vantage.")
|
|
129
132
|
|
|
130
133
|
f_ = values[:n_f_cols]
|
|
134
|
+
|
|
135
|
+
model_name = model.__class__.__name__
|
|
136
|
+
np_func_list = ["ClassifierChain", "EllipticEnvelope", "MinCovDet",
|
|
137
|
+
"FeatureAgglomeration", "LabelBinarizer", "MultiLabelBinarizer"]
|
|
138
|
+
|
|
139
|
+
# MissingIndicator requires processing the entire dataset simultaneously,
|
|
140
|
+
# rather than on a row-by-row basis.
|
|
141
|
+
|
|
142
|
+
# Error getting during row-by-row processing -
|
|
143
|
+
# "ValueError: MissingIndicator does not support data with dtype <U13.
|
|
144
|
+
# Please provide either a numeric array (with a floating point or
|
|
145
|
+
i# integer dtype) or categorical data represented ei
|
|
146
|
+
if model_name == "MissingIndicator" and func_name == "transform":
|
|
147
|
+
missing_indicator_input.append(f_)
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
f__ = np.array([f_]) if model_name in np_func_list or \
|
|
151
|
+
(model_name == "SimpleImputer" and func_name == "inverse_transform")\
|
|
152
|
+
else [f_]
|
|
153
|
+
|
|
131
154
|
if n_c_labels > 0:
|
|
132
155
|
# Labels are present in last column.
|
|
133
156
|
l_ = values[n_f_cols:n_f_cols+n_c_labels]
|
|
157
|
+
|
|
158
|
+
l__ = np.array([l_]) if model_name in np_func_list or \
|
|
159
|
+
(model_name == "SimpleImputer" and func_name == "inverse_transform")\
|
|
160
|
+
else [l_]
|
|
134
161
|
# predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
|
|
135
162
|
# in function call. Generally, 'y' is passed to return y along with actual output.
|
|
136
163
|
try:
|
|
@@ -138,17 +165,17 @@ while 1:
|
|
|
138
165
|
# used 'in' in if constion, as model.__module__ is giving
|
|
139
166
|
# 'sklearn.cross_decomposition._pls'.
|
|
140
167
|
if "cross_decomposition" in model.__module__:
|
|
141
|
-
trans_values = getattr(model, func_name)(X=
|
|
168
|
+
trans_values = getattr(model, func_name)(X=f__, Y=l__)
|
|
142
169
|
else:
|
|
143
|
-
trans_values = getattr(model, func_name)(X=
|
|
170
|
+
trans_values = getattr(model, func_name)(X=f__, y=l__)
|
|
144
171
|
|
|
145
172
|
except TypeError as ex:
|
|
146
173
|
# Function which does not accept 'y' like predict_proba() raises error like
|
|
147
174
|
# "TypeError: predict_proba() takes 2 positional arguments but 3 were given".
|
|
148
|
-
trans_values = getattr(model, func_name)(
|
|
175
|
+
trans_values = getattr(model, func_name)(f__)
|
|
149
176
|
else:
|
|
150
177
|
# If class labels do not exist in data, don't read labels, read just features.
|
|
151
|
-
trans_values = getattr(model, func_name)(
|
|
178
|
+
trans_values = getattr(model, func_name)(f__)
|
|
152
179
|
|
|
153
180
|
result_list = f_
|
|
154
181
|
if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
|
|
@@ -169,3 +196,27 @@ while 1:
|
|
|
169
196
|
|
|
170
197
|
except EOFError: # Exit if reached EOF or CTRL-D
|
|
171
198
|
break
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# MissingIndicator needs processing of all the dataset at the same time, instead of row by row.
|
|
202
|
+
# Hence, handling it outside of the while loop
|
|
203
|
+
if model_name == "MissingIndicator" and func_name == "transform":
|
|
204
|
+
m_out = model.transform(missing_indicator_input)
|
|
205
|
+
|
|
206
|
+
for j, vals in enumerate(missing_indicator_input):
|
|
207
|
+
|
|
208
|
+
m_out_list = get_output_data(trans_values=m_out[j], func_name=func_name,
|
|
209
|
+
model_obj=model, n_c_labels=n_c_labels)
|
|
210
|
+
|
|
211
|
+
result_list = missing_indicator_input[j] + m_out_list
|
|
212
|
+
|
|
213
|
+
for i, val in enumerate(result_list):
|
|
214
|
+
if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
|
|
215
|
+
result_list[i] = ""
|
|
216
|
+
# MissingIndicator returns boolean values. Convert them to 0/1.
|
|
217
|
+
elif val == False:
|
|
218
|
+
result_list[i] = 0
|
|
219
|
+
elif val == True:
|
|
220
|
+
result_list[i] = 1
|
|
221
|
+
|
|
222
|
+
print(*(data_partition_column_values + result_list), sep=DELIMITER)
|
|
@@ -1348,6 +1348,25 @@
|
|
|
1348
1348
|
"radio":"FLOAT",
|
|
1349
1349
|
"newspaper":"FLOAT",
|
|
1350
1350
|
"sales":"FLOAT"
|
|
1351
|
+
},
|
|
1352
|
+
"timestamp_data":{
|
|
1353
|
+
"id": "INTEGER",
|
|
1354
|
+
"timestamp_col": "VARCHAR(50)",
|
|
1355
|
+
"timestamp_col1": "BIGINT",
|
|
1356
|
+
"format_col": "VARCHAR(50)",
|
|
1357
|
+
"timezone_col": "VARCHAR(50)"
|
|
1358
|
+
},
|
|
1359
|
+
"interval_data":{
|
|
1360
|
+
"id": "INTEGER",
|
|
1361
|
+
"int_col": "BIGINT",
|
|
1362
|
+
"value_col": "VARCHAR(30)",
|
|
1363
|
+
"value_col1": "VARCHAR(30)",
|
|
1364
|
+
"str_col1": "VARCHAR(30)",
|
|
1365
|
+
"str_col2": "VARCHAR(30)"
|
|
1366
|
+
},
|
|
1367
|
+
"url_data": {
|
|
1368
|
+
"id": "INTEGER",
|
|
1369
|
+
"urls": "VARCHAR(60)",
|
|
1370
|
+
"part": "VARCHAR(20)"
|
|
1351
1371
|
}
|
|
1352
|
-
|
|
1353
1372
|
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
id,timestamp_col,timestamp_col1,format_col,timezone_col
|
|
2
|
+
0,"2015-01-08 00:00:12.2",123456,"YYYY-MM-DD HH24:MI:SS.FF6","GMT"
|
|
3
|
+
1,"2015-01-08 13:00",878986,"YYYY-MM-DD HH24:MI","America Pacific"
|
|
4
|
+
2,"2015-01-08 00:00:12.2+10:00",45678910234,"YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM","GMT+10"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
passenger,AttributeName,AttributeValue,survived
|
|
2
|
+
61,pclass,3,0
|
|
3
|
+
1000,pclass,3,1
|
|
4
|
+
40,pclass,3,1
|
|
5
|
+
21,pclass,2,0
|
|
6
|
+
61,gender,male,0
|
|
7
|
+
1000,gender,,1
|
|
8
|
+
40,gender,female,1
|
|
9
|
+
21,gender,male,0
|
|
10
|
+
2,pclass,1,1
|
|
11
|
+
16,pclass,2,1
|
|
12
|
+
7,pclass,1,0
|
|
13
|
+
2,gender,female,1
|
|
14
|
+
16,gender,female,1
|
|
15
|
+
7,gender,male,0
|
|
16
|
+
10,pclass,2,1
|
|
17
|
+
4,pclass,1,1
|
|
18
|
+
10,gender,female,1
|
|
19
|
+
4,gender,female,1
|
teradataml/data/uaf_example.json
CHANGED
|
@@ -471,5 +471,59 @@
|
|
|
471
471
|
"CONF_OFF_v": "FLOAT",
|
|
472
472
|
"CONF_LOW_v": "FLOAT",
|
|
473
473
|
"CONF_HI_v": "FLOAT"
|
|
474
|
+
},
|
|
475
|
+
"dwt_dataTable":{
|
|
476
|
+
"id": "INTEGER",
|
|
477
|
+
"rowi": "INTEGER",
|
|
478
|
+
"v": "FLOAT"
|
|
479
|
+
},
|
|
480
|
+
"dwt_filterTable":{
|
|
481
|
+
"id": "INTEGER",
|
|
482
|
+
"seq": "INTEGER",
|
|
483
|
+
"lo": "FLOAT",
|
|
484
|
+
"hi": "FLOAT"
|
|
485
|
+
},
|
|
486
|
+
"idwt_dataTable":{
|
|
487
|
+
"id": "INTEGER",
|
|
488
|
+
"rowi": "INTEGER",
|
|
489
|
+
"approx": "FLOAT",
|
|
490
|
+
"detail": "FLOAT"
|
|
491
|
+
},
|
|
492
|
+
"idwt_filterTable":{
|
|
493
|
+
"id": "INTEGER",
|
|
494
|
+
"seq": "INTEGER",
|
|
495
|
+
"lo": "FLOAT",
|
|
496
|
+
"hi": "FLOAT"
|
|
497
|
+
},
|
|
498
|
+
"dwt2d_dataTable":{
|
|
499
|
+
"id": "INTEGER",
|
|
500
|
+
"x": "INTEGER",
|
|
501
|
+
"y": "INTEGER",
|
|
502
|
+
"v": "FLOAT"
|
|
503
|
+
},
|
|
504
|
+
"idwt2d_dataTable":{
|
|
505
|
+
"id": "INTEGER",
|
|
506
|
+
"x": "INTEGER",
|
|
507
|
+
"y": "INTEGER",
|
|
508
|
+
"v": "FLOAT"
|
|
509
|
+
},
|
|
510
|
+
"covid_confirm_sd":{
|
|
511
|
+
"city": "VARCHAR(15)",
|
|
512
|
+
"row_axis": "INTEGER",
|
|
513
|
+
"cnumber": "INTEGER"
|
|
514
|
+
},
|
|
515
|
+
"real_values":{
|
|
516
|
+
"TD_TIMECODE": "TIMESTAMP(0)",
|
|
517
|
+
"id": "INTEGER",
|
|
518
|
+
"val": "FLOAT",
|
|
519
|
+
"<PTI_CLAUSE>": "(TIMESTAMP(0), DATE '2020-01-01', HOURS(1), COLUMNS(id), nonsequenced)"
|
|
520
|
+
},
|
|
521
|
+
"windowdfft":{
|
|
522
|
+
"id": "INTEGER",
|
|
523
|
+
"row_i": "INTEGER",
|
|
524
|
+
"v1": "FLOAT",
|
|
525
|
+
"v2": "FLOAT",
|
|
526
|
+
"v3": "FLOAT",
|
|
527
|
+
"v4": "FLOAT"
|
|
474
528
|
}
|
|
475
|
-
}
|
|
529
|
+
}
|
|
@@ -6,5 +6,20 @@
|
|
|
6
6
|
"temp": "integer",
|
|
7
7
|
"pressure": "real",
|
|
8
8
|
"dewpoint": "varchar(30)"
|
|
9
|
+
},
|
|
10
|
+
"titanic_dataset_unpivoted":{
|
|
11
|
+
"passenger": "integer",
|
|
12
|
+
"AttributeName": "varchar(30)",
|
|
13
|
+
"AttributeValue": "varchar(30)",
|
|
14
|
+
"survived": "integer"
|
|
15
|
+
},
|
|
16
|
+
"star_pivot":{
|
|
17
|
+
"country": "varchar(30)",
|
|
18
|
+
"state": "varchar(30)",
|
|
19
|
+
"yr": "integer",
|
|
20
|
+
"qtr": "varchar(30)",
|
|
21
|
+
"sales": "integer",
|
|
22
|
+
"cogs": "integer",
|
|
23
|
+
"rating": "varchar(30)"
|
|
9
24
|
}
|
|
10
25
|
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"id","urls","part"
|
|
2
|
+
0,"http://example.com:8080/path","FILE"
|
|
3
|
+
1,"ftp://example.net:21/path","PATH"
|
|
4
|
+
2,"https://example.net/path4/path5/path6?query4=value4#fragment3","REF"
|
|
5
|
+
3,"https://www.facebook.com","HOST"
|
|
6
|
+
4,"https://teracloud-pod-services-pod-account-service.dummyvalue.production.pods.teracloud.ninja/v1/accounts/acc-dummyvalue/user-environment-service/api/v1/","QUERY"
|
|
7
|
+
5,"http://pg.example.ml/path150#fragment90","AUTHORITY"
|
|
8
|
+
6,"smtp://user:password@smtp.example.com:21/file.txt","USERINFO"
|
|
9
|
+
7,"https://www.google.com","PROTOCOL"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
id,row_i,v1,v2,v3,v4
|
|
2
|
+
3,1,0.0,1.4,1.0,1.0
|
|
3
|
+
3,2,1.0,2.4,2.0,2.0
|
|
4
|
+
3,3,2.0,3.4,3.0,3.0
|
|
5
|
+
3,4,3.0,4.6,4.0,4.0
|
|
6
|
+
3,5,0.0,5.9,5.0,5.0
|
|
7
|
+
3,6,1.0,6.7,6.0,6.0
|
|
8
|
+
3,7,2.0,7.7,7.0,7.0
|
|
9
|
+
3,8,3.0,8.7,8.0,8.0
|
|
10
|
+
3,9,0.0,9.9,9.0,9.0
|
|
11
|
+
3,10,1.0,10.2,10.0,10.0
|
|
12
|
+
3,11,2.0,11.2,11.0,11.0
|
|
13
|
+
3,12,3.0,12.2,12.0,12.0
|
|
14
|
+
3,13,1.0,10.2,13.0,13.0
|
|
15
|
+
3,14,2.0,11.2,14.0,14.0
|
|
16
|
+
3,15,3.0,12.2,15.0,15.0
|
teradataml/dataframe/copy_to.py
CHANGED
|
@@ -30,7 +30,7 @@ from teradatasql import OperationalError
|
|
|
30
30
|
from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
|
|
31
31
|
from teradataml.utils.utils import execute_sql
|
|
32
32
|
from teradataml.utils.validators import _Validators
|
|
33
|
-
from
|
|
33
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
@collect_queryband(queryband="CpToSql")
|
|
@@ -27,7 +27,7 @@ from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
|
|
|
27
27
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
28
28
|
from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
|
|
29
29
|
from teradataml.utils.validators import _Validators
|
|
30
|
-
from
|
|
30
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
@collect_queryband(queryband="fstExprt")
|
|
@@ -1958,7 +1958,8 @@ class _DataTransferUtils():
|
|
|
1958
1958
|
dt_obj = _DataTransferUtils(df)
|
|
1959
1959
|
ins_query = dt_obj._table_exists()
|
|
1960
1960
|
"""
|
|
1961
|
-
return con.dialect.has_table(get_connection(), self.table_name, self.schema_name
|
|
1961
|
+
return con.dialect.has_table(get_connection(), self.table_name, self.schema_name,
|
|
1962
|
+
table_only=True)
|
|
1962
1963
|
|
|
1963
1964
|
def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
|
|
1964
1965
|
"""
|
|
@@ -2144,7 +2145,8 @@ class _DataTransferUtils():
|
|
|
2144
2145
|
# drop the tables created by FastloadCSV.
|
|
2145
2146
|
if not self.save_errors:
|
|
2146
2147
|
for table in fastloadcsv_err_tables:
|
|
2147
|
-
if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name
|
|
2148
|
+
if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name,
|
|
2149
|
+
table_only=True):
|
|
2148
2150
|
UtilFuncs._drop_table(self._get_fully_qualified_table_name(table))
|
|
2149
2151
|
err_warn_dict.update({"fastloadcsv_error_tables": []})
|
|
2150
2152
|
return err_warn_dict
|