teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +10 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +299 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +13 -3
- teradataml/analytics/json_parser/utils.py +13 -6
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +11 -2
- teradataml/analytics/table_operator/__init__.py +4 -3
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +66 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +247 -307
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +325 -86
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +122 -153
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +72 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +152 -120
- teradataml/common/messagecodes.py +11 -2
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +26 -4
- teradataml/common/utils.py +225 -14
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +82 -2
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +27 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +1002 -201
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +867 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +840 -33
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +878 -34
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
- teradataml/options/__init__.py +9 -23
- teradataml/options/configure.py +42 -4
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +13 -9
- teradataml/scriptmgmt/lls_utils.py +77 -23
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +102 -56
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +34 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -31,39 +31,83 @@ def splitter(strr, delim=",", convert_to="str"):
|
|
|
31
31
|
return []
|
|
32
32
|
return [convert_to_type(i, convert_to) for i in strr.split(delim)]
|
|
33
33
|
|
|
34
|
+
def should_convert(t_val, py_type):
|
|
35
|
+
"""
|
|
36
|
+
Function to check type of value and whether value is nan and infinity.
|
|
37
|
+
"""
|
|
38
|
+
return not isinstance(t_val, eval(py_type)) and not math.isinf(t_val) and not math.isnan(t_val)
|
|
39
|
+
|
|
40
|
+
def convert_value(t_val, py_type):
|
|
41
|
+
"""
|
|
42
|
+
Function to convert value to specified python type.
|
|
43
|
+
"""
|
|
44
|
+
return convert_to_type(t_val, py_type) if should_convert(t_val, py_type) else t_val
|
|
45
|
+
|
|
34
46
|
# Process output returned by sklearn function.
|
|
35
|
-
def get_output_data(trans_values, func_name, model_obj, n_c_labels):
|
|
36
|
-
# Converting
|
|
47
|
+
def get_output_data(trans_values, func_name, model_obj, n_c_labels, n_out_columns):
|
|
48
|
+
# Converting sparse matrix to dense array as sparse matrices are NOT
|
|
37
49
|
# supported in Vantage.
|
|
38
50
|
module_name = model_obj.__module__.split("._")[0]
|
|
39
51
|
|
|
40
|
-
|
|
41
|
-
|
|
52
|
+
# Converting the translated values into corresponding the return column's
|
|
53
|
+
# python type.
|
|
54
|
+
if (func_name == "decision_path" or return_columns_python_types is None \
|
|
55
|
+
or not isinstance(trans_values, np.ndarray)):
|
|
56
|
+
trans_values_list = trans_values
|
|
57
|
+
else:
|
|
58
|
+
# Conversion.....
|
|
59
|
+
trans_values_list = []
|
|
60
|
+
for trans_value in trans_values.tolist():
|
|
61
|
+
if not isinstance(trans_value, list):
|
|
62
|
+
trans_value = [trans_value]
|
|
63
|
+
|
|
64
|
+
converted_list = []
|
|
65
|
+
if len(return_columns_python_types) == len(trans_value):
|
|
66
|
+
for t_val, py_type in zip(trans_value, return_columns_python_types):
|
|
67
|
+
converted_list.append(convert_value(t_val, py_type))
|
|
68
|
+
## transform() is having only 1 python return type, But it actually returns more than 1 column
|
|
69
|
+
else:
|
|
70
|
+
for t_val in trans_value:
|
|
71
|
+
converted_list.append(convert_value(t_val, "".join(return_columns_python_types)))
|
|
72
|
+
|
|
73
|
+
trans_values_list.append(converted_list)
|
|
74
|
+
|
|
75
|
+
if type(trans_values_list).__name__ in ["csr_matrix", "csc_matrix"]:
|
|
76
|
+
trans_values_list = trans_values_list.toarray()
|
|
42
77
|
|
|
43
78
|
if module_name == "sklearn.cross_decomposition" and n_c_labels > 0 and func_name == "transform":
|
|
44
79
|
# For cross_decomposition, output is a tuple of arrays when label columns are provided
|
|
45
80
|
# along with feature columns for transform function. In this case, concatenate the
|
|
46
81
|
# arrays and return the combined values.
|
|
47
|
-
if isinstance(
|
|
48
|
-
return np.concatenate(
|
|
82
|
+
if isinstance(trans_values_list, tuple):
|
|
83
|
+
return np.concatenate(trans_values_list, axis=1).tolist()[0]
|
|
49
84
|
|
|
50
|
-
if isinstance(
|
|
51
|
-
or isinstance(
|
|
52
|
-
or isinstance(
|
|
85
|
+
if isinstance(trans_values_list[0], np.ndarray) \
|
|
86
|
+
or isinstance(trans_values_list[0], list) \
|
|
87
|
+
or isinstance(trans_values_list[0], tuple):
|
|
53
88
|
# Here, the value returned by sklearn function is list type.
|
|
54
|
-
opt_list = list(
|
|
89
|
+
opt_list = list(trans_values_list[0])
|
|
90
|
+
|
|
91
|
+
if len(opt_list) < n_out_columns:
|
|
92
|
+
# If the output list is less than the required number of columns, append
|
|
93
|
+
# empty strings to the list.
|
|
94
|
+
opt_list += [""] * (n_out_columns - len(opt_list))
|
|
95
|
+
|
|
55
96
|
if func_name == "inverse_transform" and type(model_obj).__name__ == "MultiLabelBinarizer":
|
|
56
97
|
# output array "trans_values[0]" may not be of same size. It should be of
|
|
57
98
|
# maximum size of `model.classes_`
|
|
58
99
|
# Append None to last elements.
|
|
59
100
|
if len(opt_list) < len(model_obj.classes_):
|
|
60
101
|
opt_list += [""] * (len(model_obj.classes_) - len(opt_list))
|
|
102
|
+
|
|
61
103
|
return opt_list
|
|
62
|
-
|
|
104
|
+
|
|
105
|
+
# Only one element is returned by the function.
|
|
106
|
+
return [trans_values_list[0]]
|
|
63
107
|
|
|
64
108
|
# Arguments to the Script
|
|
65
|
-
if len(sys.argv) !=
|
|
66
|
-
#
|
|
109
|
+
if len(sys.argv) != 10:
|
|
110
|
+
# 10 arguments command line arguments should be passed to this file.
|
|
67
111
|
# 1: file to be run
|
|
68
112
|
# 2. function name (Eg. predict, fit etc)
|
|
69
113
|
# 3. No of feature columns.
|
|
@@ -71,13 +115,17 @@ if len(sys.argv) != 8:
|
|
|
71
115
|
# 5. Comma separated indices of partition columns.
|
|
72
116
|
# 6. Comma separated types of all the data columns.
|
|
73
117
|
# 7. Model file prefix to generated model file using partition columns.
|
|
74
|
-
# 8.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
118
|
+
# 8. Number of columns to be returned by the sklearn's transform function.
|
|
119
|
+
# 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
120
|
+
# 10. Python types of returned/transfromed columns.
|
|
121
|
+
sys.exit("10 arguments should be passed to this file - file to be run, function name, "\
|
|
122
|
+
"no of feature columns, no of class labels, comma separated indices of partition "\
|
|
123
|
+
"columns, comma separated types of all columns, model file prefix to generate model "\
|
|
124
|
+
"file using partition columns, number of columns to be returnd by sklearn's "\
|
|
125
|
+
"transform function, flag to check lake or enterprise and Python types of "\
|
|
126
|
+
"returned/transfromed columns.")
|
|
79
127
|
|
|
80
|
-
is_lake_system = eval(sys.argv[
|
|
128
|
+
is_lake_system = eval(sys.argv[8])
|
|
81
129
|
if not is_lake_system:
|
|
82
130
|
db = sys.argv[0].split("/")[1]
|
|
83
131
|
func_name = sys.argv[1]
|
|
@@ -86,17 +134,29 @@ n_c_labels = int(sys.argv[3])
|
|
|
86
134
|
data_column_types = splitter(sys.argv[5], delim="--")
|
|
87
135
|
data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
|
|
88
136
|
model_file_prefix = sys.argv[6]
|
|
137
|
+
# sys.argv[9] will contain a string of python datatypes with '--'
|
|
138
|
+
# separator OR a single datatype OR None in string format.
|
|
139
|
+
ret_col_argv = sys.argv[9]
|
|
140
|
+
if ret_col_argv == "None":
|
|
141
|
+
return_columns_python_types = eval(ret_col_argv)
|
|
142
|
+
else:
|
|
143
|
+
return_columns_python_types = splitter(ret_col_argv, delim="--")
|
|
144
|
+
|
|
145
|
+
no_of_output_columns = int(sys.argv[7])
|
|
89
146
|
|
|
90
147
|
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
91
148
|
|
|
92
149
|
model = None
|
|
93
150
|
data_partition_column_values = []
|
|
94
151
|
|
|
152
|
+
all_rows_input = []
|
|
153
|
+
|
|
95
154
|
# Data Format:
|
|
96
155
|
# feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
|
|
97
156
|
# data_partition_columnn.
|
|
98
157
|
# label is optional (it is present when label_exists is not "None")
|
|
99
158
|
|
|
159
|
+
model_name = ""
|
|
100
160
|
while 1:
|
|
101
161
|
try:
|
|
102
162
|
line = input()
|
|
@@ -128,9 +188,48 @@ while 1:
|
|
|
128
188
|
sys.exit("Model file is not installed in Vantage.")
|
|
129
189
|
|
|
130
190
|
f_ = values[:n_f_cols]
|
|
191
|
+
|
|
192
|
+
model_name = model.__class__.__name__
|
|
193
|
+
np_func_list = ["ClassifierChain", "EllipticEnvelope", "MinCovDet",
|
|
194
|
+
"FeatureAgglomeration", "LabelBinarizer", "MultiLabelBinarizer",
|
|
195
|
+
"BernoulliRBM"]
|
|
196
|
+
|
|
197
|
+
# MissingIndicator's transform() and SimpleImputer's inverse_transform() requires processing
|
|
198
|
+
# the entire dataset simultaneously, rather than on a row-by-row basis.
|
|
199
|
+
|
|
200
|
+
# Error getting during row-by-row processing of MissingIndicator -
|
|
201
|
+
# "ValueError: MissingIndicator does not support data with dtype <U13.
|
|
202
|
+
# Please provide either a numeric array (with a floating point or
|
|
203
|
+
# integer dtype) or categorical data represented ei
|
|
204
|
+
|
|
205
|
+
# Error getting during row-by-row processing of SimpleImputer -
|
|
206
|
+
# "IndexError: index 3 is out of bounds for axis 1 with size 3".
|
|
207
|
+
if ((model_name == "MissingIndicator" and func_name == "transform") or \
|
|
208
|
+
(model_name == "SimpleImputer" and func_name == "inverse_transform") or \
|
|
209
|
+
(model_name in ["EllipticEnvelope", "MinCovDet"]
|
|
210
|
+
and func_name == "correct_covariance")):
|
|
211
|
+
all_rows_input.append(f_)
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
f__ = np.array([f_]) if model_name in np_func_list else [f_]
|
|
215
|
+
|
|
216
|
+
# transform() function in these functions generate different number of output columns and
|
|
217
|
+
# NULLS/NaNs are appended to the end of the output.
|
|
218
|
+
# If we run inverse_transform() on these models, it will take same number of input columns
|
|
219
|
+
# with NULLs/NaNs but those NULLs/NaNs should be ignored while reading the input to
|
|
220
|
+
# inverse_transform() function.
|
|
221
|
+
models_with_all_null_in_last_cols = ["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV"]
|
|
222
|
+
if model_name in models_with_all_null_in_last_cols and func_name == "inverse_transform":
|
|
223
|
+
# Remove NULLs/NaNs from the end of one input row.
|
|
224
|
+
_f = np.array([f_])
|
|
225
|
+
_f = _f[~np.isnan(_f)]
|
|
226
|
+
f__ = [_f.tolist()]
|
|
227
|
+
|
|
131
228
|
if n_c_labels > 0:
|
|
132
229
|
# Labels are present in last column.
|
|
133
230
|
l_ = values[n_f_cols:n_f_cols+n_c_labels]
|
|
231
|
+
|
|
232
|
+
l__ = np.array([l_]) if model_name in np_func_list else [l_]
|
|
134
233
|
# predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
|
|
135
234
|
# in function call. Generally, 'y' is passed to return y along with actual output.
|
|
136
235
|
try:
|
|
@@ -138,23 +237,24 @@ while 1:
|
|
|
138
237
|
# used 'in' in if constion, as model.__module__ is giving
|
|
139
238
|
# 'sklearn.cross_decomposition._pls'.
|
|
140
239
|
if "cross_decomposition" in model.__module__:
|
|
141
|
-
trans_values = getattr(model, func_name)(X=
|
|
240
|
+
trans_values = getattr(model, func_name)(X=f__, Y=l__)
|
|
142
241
|
else:
|
|
143
|
-
trans_values = getattr(model, func_name)(X=
|
|
242
|
+
trans_values = getattr(model, func_name)(X=f__, y=l__)
|
|
144
243
|
|
|
145
244
|
except TypeError as ex:
|
|
146
245
|
# Function which does not accept 'y' like predict_proba() raises error like
|
|
147
246
|
# "TypeError: predict_proba() takes 2 positional arguments but 3 were given".
|
|
148
|
-
trans_values = getattr(model, func_name)(
|
|
247
|
+
trans_values = getattr(model, func_name)(f__)
|
|
149
248
|
else:
|
|
150
249
|
# If class labels do not exist in data, don't read labels, read just features.
|
|
151
|
-
trans_values = getattr(model, func_name)(
|
|
250
|
+
trans_values = getattr(model, func_name)(f__)
|
|
152
251
|
|
|
153
252
|
result_list = f_
|
|
154
253
|
if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
|
|
155
254
|
result_list += l_
|
|
156
255
|
result_list += get_output_data(trans_values=trans_values, func_name=func_name,
|
|
157
|
-
model_obj=model, n_c_labels=n_c_labels
|
|
256
|
+
model_obj=model, n_c_labels=n_c_labels,
|
|
257
|
+
n_out_columns=no_of_output_columns)
|
|
158
258
|
|
|
159
259
|
for i, val in enumerate(result_list):
|
|
160
260
|
if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
|
|
@@ -169,3 +269,41 @@ while 1:
|
|
|
169
269
|
|
|
170
270
|
except EOFError: # Exit if reached EOF or CTRL-D
|
|
171
271
|
break
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# MissingIndicator and SimpleImputer needs processing of all the dataset at the same time, instead of row by row.
|
|
275
|
+
# Hence, handling it outside of the while loop
|
|
276
|
+
if model_name == "MissingIndicator" and func_name == "transform" or \
|
|
277
|
+
(model_name == "SimpleImputer" and func_name == "inverse_transform"):
|
|
278
|
+
if model_name == "SimpleImputer":
|
|
279
|
+
all_rows_input = np.array(all_rows_input)
|
|
280
|
+
m_out = getattr(model, func_name)(all_rows_input)
|
|
281
|
+
|
|
282
|
+
if type(m_out).__name__ in ["csr_matrix", "csc_matrix"]:
|
|
283
|
+
m_out = m_out.toarray()
|
|
284
|
+
|
|
285
|
+
for j in range(len(all_rows_input)):
|
|
286
|
+
m_out_list = get_output_data(trans_values=[m_out[j]], func_name=func_name,
|
|
287
|
+
model_obj=model, n_c_labels=n_c_labels,
|
|
288
|
+
n_out_columns=no_of_output_columns)
|
|
289
|
+
|
|
290
|
+
result_list = list(all_rows_input[j]) + list(m_out_list)
|
|
291
|
+
|
|
292
|
+
for i, val in enumerate(result_list):
|
|
293
|
+
if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
|
|
294
|
+
result_list[i] = ""
|
|
295
|
+
# MissingIndicator returns boolean values. Convert them to 0/1.
|
|
296
|
+
elif val == False:
|
|
297
|
+
result_list[i] = 0
|
|
298
|
+
elif val == True:
|
|
299
|
+
result_list[i] = 1
|
|
300
|
+
|
|
301
|
+
print(*(data_partition_column_values + result_list), sep=DELIMITER)
|
|
302
|
+
|
|
303
|
+
## correct_covariance() requires processing of all the input rows at the same time.
|
|
304
|
+
## It returns the output dataset in (n_features, n_features) shape, i.e., based on
|
|
305
|
+
## no. of columns.
|
|
306
|
+
if model_name in ["EllipticEnvelope", "MinCovDet"] and func_name == "correct_covariance":
|
|
307
|
+
result_list = model.correct_covariance(np.array(all_rows_input))
|
|
308
|
+
for l, vals in enumerate(result_list):
|
|
309
|
+
print(*(data_partition_column_values + vals.tolist()), sep=DELIMITER)
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
"env_specs": [
|
|
3
3
|
{
|
|
4
4
|
"env_name": "openml_env",
|
|
5
|
-
"libs": "scikit-learn",
|
|
5
|
+
"libs": ["scikit-learn==1.5.1", "joblib==1.4.2", "numpy==1.23.5",
|
|
6
|
+
"scipy==1.14.0", "threadpoolctl==3.5.0", "lightgbm==3.3.3",
|
|
7
|
+
"pandas==2.2.3"],
|
|
6
8
|
"desc": "DONT DELETE: OpenML environment"
|
|
7
9
|
}
|
|
8
10
|
]
|
|
@@ -1348,6 +1348,25 @@
|
|
|
1348
1348
|
"radio":"FLOAT",
|
|
1349
1349
|
"newspaper":"FLOAT",
|
|
1350
1350
|
"sales":"FLOAT"
|
|
1351
|
+
},
|
|
1352
|
+
"timestamp_data":{
|
|
1353
|
+
"id": "INTEGER",
|
|
1354
|
+
"timestamp_col": "VARCHAR(50)",
|
|
1355
|
+
"timestamp_col1": "BIGINT",
|
|
1356
|
+
"format_col": "VARCHAR(50)",
|
|
1357
|
+
"timezone_col": "VARCHAR(50)"
|
|
1358
|
+
},
|
|
1359
|
+
"interval_data":{
|
|
1360
|
+
"id": "INTEGER",
|
|
1361
|
+
"int_col": "BIGINT",
|
|
1362
|
+
"value_col": "VARCHAR(30)",
|
|
1363
|
+
"value_col1": "VARCHAR(30)",
|
|
1364
|
+
"str_col1": "VARCHAR(30)",
|
|
1365
|
+
"str_col2": "VARCHAR(30)"
|
|
1366
|
+
},
|
|
1367
|
+
"url_data": {
|
|
1368
|
+
"id": "INTEGER",
|
|
1369
|
+
"urls": "VARCHAR(60)",
|
|
1370
|
+
"part": "VARCHAR(20)"
|
|
1351
1371
|
}
|
|
1352
|
-
|
|
1353
1372
|
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
id,timestamp_col,timestamp_col1,format_col,timezone_col
|
|
2
|
+
0,"2015-01-08 00:00:12.2",123456,"YYYY-MM-DD HH24:MI:SS.FF6","GMT"
|
|
3
|
+
1,"2015-01-08 13:00",878986,"YYYY-MM-DD HH24:MI","America Pacific"
|
|
4
|
+
2,"2015-01-08 00:00:12.2+10:00",45678910234,"YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM","GMT+10"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
passenger,AttributeName,AttributeValue,survived
|
|
2
|
+
61,pclass,3,0
|
|
3
|
+
1000,pclass,3,1
|
|
4
|
+
40,pclass,3,1
|
|
5
|
+
21,pclass,2,0
|
|
6
|
+
61,gender,male,0
|
|
7
|
+
1000,gender,,1
|
|
8
|
+
40,gender,female,1
|
|
9
|
+
21,gender,male,0
|
|
10
|
+
2,pclass,1,1
|
|
11
|
+
16,pclass,2,1
|
|
12
|
+
7,pclass,1,0
|
|
13
|
+
2,gender,female,1
|
|
14
|
+
16,gender,female,1
|
|
15
|
+
7,gender,male,0
|
|
16
|
+
10,pclass,2,1
|
|
17
|
+
4,pclass,1,1
|
|
18
|
+
10,gender,female,1
|
|
19
|
+
4,gender,female,1
|
teradataml/data/uaf_example.json
CHANGED
|
@@ -471,5 +471,59 @@
|
|
|
471
471
|
"CONF_OFF_v": "FLOAT",
|
|
472
472
|
"CONF_LOW_v": "FLOAT",
|
|
473
473
|
"CONF_HI_v": "FLOAT"
|
|
474
|
+
},
|
|
475
|
+
"dwt_dataTable":{
|
|
476
|
+
"id": "INTEGER",
|
|
477
|
+
"rowi": "INTEGER",
|
|
478
|
+
"v": "FLOAT"
|
|
479
|
+
},
|
|
480
|
+
"dwt_filterTable":{
|
|
481
|
+
"id": "INTEGER",
|
|
482
|
+
"seq": "INTEGER",
|
|
483
|
+
"lo": "FLOAT",
|
|
484
|
+
"hi": "FLOAT"
|
|
485
|
+
},
|
|
486
|
+
"idwt_dataTable":{
|
|
487
|
+
"id": "INTEGER",
|
|
488
|
+
"rowi": "INTEGER",
|
|
489
|
+
"approx": "FLOAT",
|
|
490
|
+
"detail": "FLOAT"
|
|
491
|
+
},
|
|
492
|
+
"idwt_filterTable":{
|
|
493
|
+
"id": "INTEGER",
|
|
494
|
+
"seq": "INTEGER",
|
|
495
|
+
"lo": "FLOAT",
|
|
496
|
+
"hi": "FLOAT"
|
|
497
|
+
},
|
|
498
|
+
"dwt2d_dataTable":{
|
|
499
|
+
"id": "INTEGER",
|
|
500
|
+
"x": "INTEGER",
|
|
501
|
+
"y": "INTEGER",
|
|
502
|
+
"v": "FLOAT"
|
|
503
|
+
},
|
|
504
|
+
"idwt2d_dataTable":{
|
|
505
|
+
"id": "INTEGER",
|
|
506
|
+
"x": "INTEGER",
|
|
507
|
+
"y": "INTEGER",
|
|
508
|
+
"v": "FLOAT"
|
|
509
|
+
},
|
|
510
|
+
"covid_confirm_sd":{
|
|
511
|
+
"city": "VARCHAR(15)",
|
|
512
|
+
"row_axis": "INTEGER",
|
|
513
|
+
"cnumber": "INTEGER"
|
|
514
|
+
},
|
|
515
|
+
"real_values":{
|
|
516
|
+
"TD_TIMECODE": "TIMESTAMP(0)",
|
|
517
|
+
"id": "INTEGER",
|
|
518
|
+
"val": "FLOAT",
|
|
519
|
+
"<PTI_CLAUSE>": "(TIMESTAMP(0), DATE '2020-01-01', HOURS(1), COLUMNS(id), nonsequenced)"
|
|
520
|
+
},
|
|
521
|
+
"windowdfft":{
|
|
522
|
+
"id": "INTEGER",
|
|
523
|
+
"row_i": "INTEGER",
|
|
524
|
+
"v1": "FLOAT",
|
|
525
|
+
"v2": "FLOAT",
|
|
526
|
+
"v3": "FLOAT",
|
|
527
|
+
"v4": "FLOAT"
|
|
474
528
|
}
|
|
475
|
-
}
|
|
529
|
+
}
|
|
@@ -6,5 +6,20 @@
|
|
|
6
6
|
"temp": "integer",
|
|
7
7
|
"pressure": "real",
|
|
8
8
|
"dewpoint": "varchar(30)"
|
|
9
|
+
},
|
|
10
|
+
"titanic_dataset_unpivoted":{
|
|
11
|
+
"passenger": "integer",
|
|
12
|
+
"AttributeName": "varchar(30)",
|
|
13
|
+
"AttributeValue": "varchar(30)",
|
|
14
|
+
"survived": "integer"
|
|
15
|
+
},
|
|
16
|
+
"star_pivot":{
|
|
17
|
+
"country": "varchar(30)",
|
|
18
|
+
"state": "varchar(30)",
|
|
19
|
+
"yr": "integer",
|
|
20
|
+
"qtr": "varchar(30)",
|
|
21
|
+
"sales": "integer",
|
|
22
|
+
"cogs": "integer",
|
|
23
|
+
"rating": "varchar(30)"
|
|
9
24
|
}
|
|
10
25
|
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"id","urls","part"
|
|
2
|
+
0,"http://example.com:8080/path","FILE"
|
|
3
|
+
1,"ftp://example.net:21/path","PATH"
|
|
4
|
+
2,"https://example.net/path4/path5/path6?query4=value4#fragment3","REF"
|
|
5
|
+
3,"https://www.facebook.com","HOST"
|
|
6
|
+
4,"https://teracloud-pod-services-pod-account-service.dummyvalue.production.pods.teracloud.ninja/v1/accounts/acc-dummyvalue/user-environment-service/api/v1/","QUERY"
|
|
7
|
+
5,"http://pg.example.ml/path150#fragment90","AUTHORITY"
|
|
8
|
+
6,"smtp://user:password@smtp.example.com:21/file.txt","USERINFO"
|
|
9
|
+
7,"https://www.google.com","PROTOCOL"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
id,row_i,v1,v2,v3,v4
|
|
2
|
+
3,1,0.0,1.4,1.0,1.0
|
|
3
|
+
3,2,1.0,2.4,2.0,2.0
|
|
4
|
+
3,3,2.0,3.4,3.0,3.0
|
|
5
|
+
3,4,3.0,4.6,4.0,4.0
|
|
6
|
+
3,5,0.0,5.9,5.0,5.0
|
|
7
|
+
3,6,1.0,6.7,6.0,6.0
|
|
8
|
+
3,7,2.0,7.7,7.0,7.0
|
|
9
|
+
3,8,3.0,8.7,8.0,8.0
|
|
10
|
+
3,9,0.0,9.9,9.0,9.0
|
|
11
|
+
3,10,1.0,10.2,10.0,10.0
|
|
12
|
+
3,11,2.0,11.2,11.0,11.0
|
|
13
|
+
3,12,3.0,12.2,12.0,12.0
|
|
14
|
+
3,13,1.0,10.2,13.0,13.0
|
|
15
|
+
3,14,2.0,11.2,14.0,14.0
|
|
16
|
+
3,15,3.0,12.2,15.0,15.0
|
teradataml/dataframe/copy_to.py
CHANGED
|
@@ -30,7 +30,7 @@ from teradatasql import OperationalError
|
|
|
30
30
|
from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
|
|
31
31
|
from teradataml.utils.utils import execute_sql
|
|
32
32
|
from teradataml.utils.validators import _Validators
|
|
33
|
-
from
|
|
33
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
@collect_queryband(queryband="CpToSql")
|
|
@@ -27,7 +27,7 @@ from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
|
|
|
27
27
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
28
28
|
from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
|
|
29
29
|
from teradataml.utils.validators import _Validators
|
|
30
|
-
from
|
|
30
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
@collect_queryband(queryband="fstExprt")
|
|
@@ -1958,7 +1958,8 @@ class _DataTransferUtils():
|
|
|
1958
1958
|
dt_obj = _DataTransferUtils(df)
|
|
1959
1959
|
ins_query = dt_obj._table_exists()
|
|
1960
1960
|
"""
|
|
1961
|
-
return con.dialect.has_table(get_connection(), self.table_name, self.schema_name
|
|
1961
|
+
return con.dialect.has_table(get_connection(), self.table_name, self.schema_name,
|
|
1962
|
+
table_only=True)
|
|
1962
1963
|
|
|
1963
1964
|
def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
|
|
1964
1965
|
"""
|
|
@@ -2144,7 +2145,8 @@ class _DataTransferUtils():
|
|
|
2144
2145
|
# drop the tables created by FastloadCSV.
|
|
2145
2146
|
if not self.save_errors:
|
|
2146
2147
|
for table in fastloadcsv_err_tables:
|
|
2147
|
-
if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name
|
|
2148
|
+
if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name,
|
|
2149
|
+
table_only=True):
|
|
2148
2150
|
UtilFuncs._drop_table(self._get_fully_qualified_table_name(table))
|
|
2149
2151
|
err_warn_dict.update({"fastloadcsv_error_tables": []})
|
|
2150
2152
|
return err_warn_dict
|