teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +71 -0
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +51 -24
- teradataml/analytics/json_parser/utils.py +11 -17
- teradataml/automl/__init__.py +103 -48
- teradataml/automl/data_preparation.py +55 -37
- teradataml/automl/data_transformation.py +131 -69
- teradataml/automl/feature_engineering.py +117 -185
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +13 -25
- teradataml/automl/model_training.py +214 -75
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +11 -6
- teradataml/common/garbagecollector.py +5 -0
- teradataml/common/messagecodes.py +3 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/utils.py +6 -0
- teradataml/context/context.py +49 -29
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/glm_example.json +28 -1
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +20 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
- teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
- teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
- teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
- teradataml/data/teradataml_example.json +77 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +120 -61
- teradataml/dataframe/dataframe.py +102 -17
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +272 -89
- teradataml/dataframe/sql.py +84 -0
- teradataml/dbutils/dbutils.py +2 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
- teradataml/options/__init__.py +13 -4
- teradataml/options/configure.py +27 -6
- teradataml/scriptmgmt/UserEnv.py +19 -16
- teradataml/scriptmgmt/lls_utils.py +117 -14
- teradataml/table_operators/Script.py +2 -3
- teradataml/table_operators/TableOperator.py +58 -10
- teradataml/utils/validators.py +40 -2
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
|
@@ -4,41 +4,24 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
# The below import is needed to convert sparse matrix to dense array as sparse matrices are NOT
|
|
8
|
-
# supported in Vantage.
|
|
9
|
-
# This is in scipy 1.10.0. Might vary based on scipy version.
|
|
10
|
-
from scipy.sparse import csr_matrix
|
|
11
|
-
|
|
12
7
|
DELIMITER = '\t'
|
|
13
8
|
|
|
14
|
-
def
|
|
15
|
-
ret_val = value
|
|
16
|
-
try:
|
|
17
|
-
ret_val = float(value.replace(' ', ''))
|
|
18
|
-
except Exception as ex:
|
|
19
|
-
# If the value can't be converted to float, then it is string.
|
|
20
|
-
pass
|
|
21
|
-
return ret_val
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def get_values_list(values, ignore_none=True):
|
|
9
|
+
def get_values_list(values, types):
|
|
25
10
|
ret_vals = []
|
|
26
|
-
for val in values:
|
|
27
|
-
|
|
28
|
-
# Empty cell value in the database table.
|
|
29
|
-
continue
|
|
30
|
-
ret_vals.append(get_value(val))
|
|
31
|
-
|
|
11
|
+
for i, val in enumerate(values):
|
|
12
|
+
ret_vals.append(convert_to_type(val, types[i]))
|
|
32
13
|
return ret_vals
|
|
33
14
|
|
|
34
15
|
def convert_to_type(val, typee):
|
|
35
16
|
if typee == 'int':
|
|
36
|
-
return int(val)
|
|
17
|
+
return int(val) if val != "" else np.nan
|
|
37
18
|
if typee == 'float':
|
|
38
|
-
|
|
19
|
+
if isinstance(val, str):
|
|
20
|
+
val = val.replace(' ', '')
|
|
21
|
+
return float(val) if val != "" else np.nan
|
|
39
22
|
if typee == 'bool':
|
|
40
|
-
return eval(val)
|
|
41
|
-
return str(val)
|
|
23
|
+
return eval(val) if val != "" else None
|
|
24
|
+
return str(val) if val != "" else None
|
|
42
25
|
|
|
43
26
|
def splitter(strr, delim=",", convert_to="str"):
|
|
44
27
|
"""
|
|
@@ -54,7 +37,7 @@ def get_output_data(trans_values, func_name, model_obj, n_c_labels):
|
|
|
54
37
|
# supported in Vantage.
|
|
55
38
|
module_name = model_obj.__module__.split("._")[0]
|
|
56
39
|
|
|
57
|
-
if
|
|
40
|
+
if type(trans_values).__name__ in ["csr_matrix", "csc_matrix"]:
|
|
58
41
|
trans_values = trans_values.toarray()
|
|
59
42
|
|
|
60
43
|
if module_name == "sklearn.cross_decomposition" and n_c_labels > 0 and func_name == "transform":
|
|
@@ -86,13 +69,13 @@ if len(sys.argv) != 8:
|
|
|
86
69
|
# 3. No of feature columns.
|
|
87
70
|
# 4. No of class labels.
|
|
88
71
|
# 5. Comma separated indices of partition columns.
|
|
89
|
-
# 6. Comma separated types of the
|
|
72
|
+
# 6. Comma separated types of all the data columns.
|
|
90
73
|
# 7. Model file prefix to generated model file using partition columns.
|
|
91
74
|
# 8. Flag to check the system type. True, means Lake, Enterprise otherwise.
|
|
92
75
|
sys.exit("8 arguments should be passed to this file - file to be run, function name, "\
|
|
93
|
-
"no of feature columns, no of class labels, comma separated indices
|
|
94
|
-
"
|
|
95
|
-
"columns and flag to check lake or enterprise.")
|
|
76
|
+
"no of feature columns, no of class labels, comma separated indices of partition "\
|
|
77
|
+
"columns, comma separated types of all columns, model file prefix to generate model "\
|
|
78
|
+
"file using partition columns and flag to check lake or enterprise.")
|
|
96
79
|
|
|
97
80
|
is_lake_system = eval(sys.argv[7])
|
|
98
81
|
if not is_lake_system:
|
|
@@ -100,10 +83,12 @@ if not is_lake_system:
|
|
|
100
83
|
func_name = sys.argv[1]
|
|
101
84
|
n_f_cols = int(sys.argv[2])
|
|
102
85
|
n_c_labels = int(sys.argv[3])
|
|
103
|
-
|
|
86
|
+
data_column_types = splitter(sys.argv[5], delim="--")
|
|
104
87
|
data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
|
|
105
88
|
model_file_prefix = sys.argv[6]
|
|
106
89
|
|
|
90
|
+
data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
|
|
91
|
+
|
|
107
92
|
model = None
|
|
108
93
|
data_partition_column_values = []
|
|
109
94
|
|
|
@@ -119,6 +104,7 @@ while 1:
|
|
|
119
104
|
break
|
|
120
105
|
else:
|
|
121
106
|
values = line.split(DELIMITER)
|
|
107
|
+
values = get_values_list(values, data_column_types)
|
|
122
108
|
if not data_partition_column_values:
|
|
123
109
|
# Partition column values is same for all rows. Hence, only read once.
|
|
124
110
|
for i, val in enumerate(data_partition_column_indices):
|
|
@@ -141,10 +127,10 @@ while 1:
|
|
|
141
127
|
if not model:
|
|
142
128
|
sys.exit("Model file is not installed in Vantage.")
|
|
143
129
|
|
|
144
|
-
f_ =
|
|
130
|
+
f_ = values[:n_f_cols]
|
|
145
131
|
if n_c_labels > 0:
|
|
146
132
|
# Labels are present in last column.
|
|
147
|
-
l_ =
|
|
133
|
+
l_ = values[n_f_cols:n_f_cols+n_c_labels]
|
|
148
134
|
# predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
|
|
149
135
|
# in function call. Generally, 'y' is passed to return y along with actual output.
|
|
150
136
|
try:
|
|
@@ -170,10 +156,16 @@ while 1:
|
|
|
170
156
|
result_list += get_output_data(trans_values=trans_values, func_name=func_name,
|
|
171
157
|
model_obj=model, n_c_labels=n_c_labels)
|
|
172
158
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
159
|
+
for i, val in enumerate(result_list):
|
|
160
|
+
if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
|
|
161
|
+
result_list[i] = ""
|
|
162
|
+
# MissingIndicator returns boolean values. Convert them to 0/1.
|
|
163
|
+
elif val == False:
|
|
164
|
+
result_list[i] = 0
|
|
165
|
+
elif val == True:
|
|
166
|
+
result_list[i] = 1
|
|
167
|
+
|
|
168
|
+
print(*(data_partition_column_values + result_list), sep=DELIMITER)
|
|
177
169
|
|
|
178
170
|
except EOFError: # Exit if reached EOF or CTRL-D
|
|
179
171
|
break
|
|
@@ -1271,6 +1271,83 @@
|
|
|
1271
1271
|
"height":"INTEGER",
|
|
1272
1272
|
"weight":"INTEGER",
|
|
1273
1273
|
"bmi":"INTEGER"
|
|
1274
|
+
},
|
|
1275
|
+
"breast_cancer":{
|
|
1276
|
+
"id":"BIGINT",
|
|
1277
|
+
"diagnosis":"VARCHAR(20)",
|
|
1278
|
+
"radius_mean":"FLOAT",
|
|
1279
|
+
"texture_mean":"FLOAT",
|
|
1280
|
+
"perimeter_mean":"FLOAT",
|
|
1281
|
+
"area_mean":"FLOAT",
|
|
1282
|
+
"smoothness_mean":"FLOAT",
|
|
1283
|
+
"compactness_mean":"FLOAT",
|
|
1284
|
+
"concavity_mean":"FLOAT",
|
|
1285
|
+
"concave_points_mean":"FLOAT",
|
|
1286
|
+
"symmetry_mean":"FLOAT",
|
|
1287
|
+
"fractal_dimension_mean":"FLOAT",
|
|
1288
|
+
"radius_se":"FLOAT",
|
|
1289
|
+
"texture_se":"FLOAT",
|
|
1290
|
+
"perimeter_se":"FLOAT",
|
|
1291
|
+
"area_se":"FLOAT",
|
|
1292
|
+
"smoothness_se":"FLOAT",
|
|
1293
|
+
"compactness_se":"FLOAT",
|
|
1294
|
+
"concavity_se":"FLOAT",
|
|
1295
|
+
"concave_points_se":"FLOAT",
|
|
1296
|
+
"symmetry_se":"FLOAT",
|
|
1297
|
+
"fractal_dimension_se":"FLOAT",
|
|
1298
|
+
"radius_worst":"FLOAT",
|
|
1299
|
+
"texture_worst":"FLOAT",
|
|
1300
|
+
"perimeter_worst":"FLOAT",
|
|
1301
|
+
"area_worst":"FLOAT",
|
|
1302
|
+
"smoothness_worst":"FLOAT",
|
|
1303
|
+
"compactness_worst":"FLOAT",
|
|
1304
|
+
"concavity_worst":"FLOAT",
|
|
1305
|
+
"concave_points_worst":"FLOAT",
|
|
1306
|
+
"symmetry_worst":"FLOAT",
|
|
1307
|
+
"fractal_dimension_worst":"FLOAT"
|
|
1308
|
+
},
|
|
1309
|
+
"bike_sharing" :{
|
|
1310
|
+
"instant":"BIGINT",
|
|
1311
|
+
"dteday":"DATE FORMAT 'dd-mm-yyyy'",
|
|
1312
|
+
"season":"BIGINT",
|
|
1313
|
+
"yr":"BIGINT",
|
|
1314
|
+
"mnth":"BIGINT",
|
|
1315
|
+
"holiday":"BIGINT",
|
|
1316
|
+
"weekday":"BIGINT",
|
|
1317
|
+
"workingday":"BIGINT",
|
|
1318
|
+
"weathersit":"BIGINT",
|
|
1319
|
+
"temp":"FLOAT",
|
|
1320
|
+
"atemp":"FLOAT",
|
|
1321
|
+
"hum":"FLOAT",
|
|
1322
|
+
"windspeed":"FLOAT",
|
|
1323
|
+
"casual":"BIGINT",
|
|
1324
|
+
"registered":"BIGINT",
|
|
1325
|
+
"cnt":"BIGINT"
|
|
1326
|
+
},
|
|
1327
|
+
"bank_marketing":{
|
|
1328
|
+
"age":"BIGINT",
|
|
1329
|
+
"job":"VARCHAR(20)",
|
|
1330
|
+
"marital":"VARCHAR(20)",
|
|
1331
|
+
"education":"VARCHAR(20)",
|
|
1332
|
+
"default_value":"VARCHAR(20)",
|
|
1333
|
+
"balance":"BIGINT",
|
|
1334
|
+
"housing":"VARCHAR(20)",
|
|
1335
|
+
"loan":"VARCHAR(20)",
|
|
1336
|
+
"contact":"VARCHAR(20)",
|
|
1337
|
+
"day_of_month":"BIGINT",
|
|
1338
|
+
"month_of_year":"VARCHAR(20)",
|
|
1339
|
+
"duration":"BIGINT",
|
|
1340
|
+
"campaign":"BIGINT",
|
|
1341
|
+
"pdays":"BIGINT",
|
|
1342
|
+
"previous":"BIGINT",
|
|
1343
|
+
"poutcome":"VARCHAR(20)",
|
|
1344
|
+
"deposit":"VARCHAR(20)"
|
|
1345
|
+
},
|
|
1346
|
+
"advertising":{
|
|
1347
|
+
"TV":"FLOAT",
|
|
1348
|
+
"radio":"FLOAT",
|
|
1349
|
+
"newspaper":"FLOAT",
|
|
1350
|
+
"sales":"FLOAT"
|
|
1274
1351
|
}
|
|
1275
1352
|
|
|
1276
1353
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"roc_input" : {
|
|
3
|
+
"model_id" : "integer",
|
|
4
|
+
"id" : "integer",
|
|
5
|
+
"observation" : "integer",
|
|
6
|
+
"probability" : "real"
|
|
7
|
+
},
|
|
8
|
+
"boston2cols":{
|
|
9
|
+
"groupName":"VARCHAR(40)",
|
|
10
|
+
"groupValue":"REAL"
|
|
11
|
+
},
|
|
12
|
+
"insect2Cols":{
|
|
13
|
+
"groupName":"VARCHAR(40)",
|
|
14
|
+
"groupValue":"INTEGER"
|
|
15
|
+
}
|
|
16
|
+
}
|
teradataml/dataframe/copy_to.py
CHANGED
|
@@ -569,7 +569,6 @@ def copy_to_sql(df, table_name,
|
|
|
569
569
|
chunksize, is_pti, timecode_column,
|
|
570
570
|
sequence_column, match_column_order)
|
|
571
571
|
|
|
572
|
-
|
|
573
572
|
# df is a teradataml DataFrame object (to_sql wrapper used)
|
|
574
573
|
elif isinstance(df, tdmldf.DataFrame):
|
|
575
574
|
df_column_list = [col.name for col in df._metaexpr.c]
|
|
@@ -578,7 +577,13 @@ def copy_to_sql(df, table_name,
|
|
|
578
577
|
# Reorder the column list to reposition the timecode and sequence columns
|
|
579
578
|
df_column_list = _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_column)
|
|
580
579
|
|
|
581
|
-
|
|
580
|
+
source_tbl_name = UtilFuncs._extract_table_name(df._table_name)
|
|
581
|
+
from_schema_name = UtilFuncs._extract_db_name(df._table_name)
|
|
582
|
+
|
|
583
|
+
df_utils._insert_all_from_table(table_name, source_tbl_name, df_column_list,
|
|
584
|
+
to_schema_name=schema_name,
|
|
585
|
+
from_schema_name=from_schema_name,
|
|
586
|
+
temporary=temporary)
|
|
582
587
|
|
|
583
588
|
# While table name conflict is present, Delete the source table after creation of temporary table.
|
|
584
589
|
# Rename the temporary table to destination table name.
|
|
@@ -1656,7 +1661,7 @@ def _get_all_sqlalchemy_mappings():
|
|
|
1656
1661
|
EXAMPLES:
|
|
1657
1662
|
_get_all_sqlalchemy_mappings()
|
|
1658
1663
|
"""
|
|
1659
|
-
teradata_types_map = {'int32':INTEGER(), 'int64':BIGINT(),
|
|
1664
|
+
teradata_types_map = {'int32':INTEGER(), 'int64':BIGINT(), "Int64": INTEGER(),
|
|
1660
1665
|
'object':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
|
|
1661
1666
|
'O':VARCHAR(configure.default_varchar_size,charset='UNICODE'),
|
|
1662
1667
|
'float64':FLOAT(), 'float32':FLOAT(), 'bool':BYTEINT(),
|