teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +315 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +95 -8
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +5 -1
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +59 -35
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +27 -12
- teradataml/automl/model_training.py +73 -46
- teradataml/common/constants.py +88 -29
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +19 -3
- teradataml/common/messages.py +6 -1
- teradataml/common/sqlbundle.py +64 -12
- teradataml/common/utils.py +246 -47
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +161 -27
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +1049 -285
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +578 -35
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +185 -16
- teradataml/dbutils/dbutils.py +1049 -115
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/_base.py +1466 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
- teradataml/options/__init__.py +54 -38
- teradataml/options/configure.py +131 -27
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +5 -5
- teradataml/scriptmgmt/lls_utils.py +130 -40
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2318 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +51 -2
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +99 -8
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_class.py +0 -255
- teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -316,14 +316,6 @@ class Apply(TableOperator):
|
|
|
316
316
|
is_local_order,
|
|
317
317
|
sort_ascending,
|
|
318
318
|
nulls_first)
|
|
319
|
-
|
|
320
|
-
# Set the variable specific to this child class.
|
|
321
|
-
self.apply_command = apply_command
|
|
322
|
-
self.env_name = env_name if env_name is not None else get_user_env()
|
|
323
|
-
self.style = style
|
|
324
|
-
self.returns = returns
|
|
325
|
-
self._skip_argument_validation = False
|
|
326
|
-
|
|
327
319
|
# Create AnalyticsWrapperUtils instance which contains validation functions.
|
|
328
320
|
# This is required for is_default_or_not check.
|
|
329
321
|
# Rest all validation is done using _Validators
|
|
@@ -332,20 +324,42 @@ class Apply(TableOperator):
|
|
|
332
324
|
# Perform argument validation for arguments specific to this class.
|
|
333
325
|
self.__arg_info_matrix = []
|
|
334
326
|
|
|
335
|
-
self.__arg_info_matrix.append(["style",
|
|
336
|
-
self.__arg_info_matrix.append(["env_name",
|
|
337
|
-
self.__arg_info_matrix.append(["apply_command",
|
|
338
|
-
self.__arg_info_matrix.append(["returns",
|
|
339
|
-
|
|
327
|
+
self.__arg_info_matrix.append(["style", style, True, (str), True, ['CSV']])
|
|
328
|
+
self.__arg_info_matrix.append(["env_name", env_name, False, (str, UserEnv), True])
|
|
329
|
+
self.__arg_info_matrix.append(["apply_command", apply_command, False, (str), True])
|
|
330
|
+
self.__arg_info_matrix.append(["returns", returns, True, (dict), True])
|
|
331
|
+
self._skip_argument_validation = False
|
|
340
332
|
# Perform the function argument validations.
|
|
341
333
|
self.__apply__validate()
|
|
342
334
|
|
|
343
|
-
|
|
335
|
+
# If user do not pass environment, get the default environment.
|
|
336
|
+
if env_name is None:
|
|
337
|
+
env_name = get_user_env()
|
|
338
|
+
self._open_af_env = env_name
|
|
339
|
+
|
|
340
|
+
# Set the variable specific to this child class.
|
|
341
|
+
self.apply_command = apply_command
|
|
342
|
+
self.env_name = env_name if isinstance(env_name, str) else env_name.env_name
|
|
343
|
+
self.style = style
|
|
344
|
+
self.returns = returns
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
@property
|
|
348
|
+
def env(self):
|
|
349
|
+
"""
|
|
350
|
+
DESCRIPTION:
|
|
351
|
+
Getter to get environment.
|
|
352
|
+
|
|
353
|
+
RETURNS:
|
|
354
|
+
bool
|
|
355
|
+
|
|
356
|
+
RAISES:
|
|
357
|
+
None
|
|
358
|
+
"""
|
|
359
|
+
if isinstance(self._open_af_env, str):
|
|
360
|
+
self._open_af_env = get_env(self._open_af_env)
|
|
344
361
|
|
|
345
|
-
|
|
346
|
-
# remote user environment name as string.
|
|
347
|
-
if isinstance(self.env_name, UserEnv):
|
|
348
|
-
self.env_name = self.env_name.env_name
|
|
362
|
+
return self._open_af_env
|
|
349
363
|
|
|
350
364
|
@property
|
|
351
365
|
def skip_argument_validation(self):
|
|
@@ -1701,7 +1701,9 @@ class Script(TableOperator):
|
|
|
1701
1701
|
gc_on_quit=True, quote=False,
|
|
1702
1702
|
table_type=table_type)
|
|
1703
1703
|
try:
|
|
1704
|
-
if
|
|
1704
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
1705
|
+
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
|
|
1706
|
+
elif output_style == OutputStyle.OUTPUT_TABLE.value:
|
|
1705
1707
|
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
|
|
1706
1708
|
else:
|
|
1707
1709
|
UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
|
|
@@ -458,7 +458,9 @@ class TableOperator:
|
|
|
458
458
|
)
|
|
459
459
|
|
|
460
460
|
try:
|
|
461
|
-
if
|
|
461
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
462
|
+
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
|
|
463
|
+
elif output_style == OutputStyle.OUTPUT_TABLE.value:
|
|
462
464
|
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
|
|
463
465
|
else:
|
|
464
466
|
UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
|
|
@@ -481,6 +481,9 @@ class QueryGenerator:
|
|
|
481
481
|
return configure.read_nos_function_mapping.upper()
|
|
482
482
|
elif "WriteNOS".lower() == function_name.lower():
|
|
483
483
|
return configure.write_nos_function_mapping.upper()
|
|
484
|
+
# If Table Operator function is IMAGE2MATRIX, then return alias name as TD_IMAGE2MATRIX.
|
|
485
|
+
elif "IMAGE2MATRIX".lower() == function_name.lower():
|
|
486
|
+
return "TD_IMAGE2MATRIX"
|
|
484
487
|
|
|
485
488
|
engine_name = UtilFuncs._get_engine_name(self._engine)
|
|
486
489
|
|
|
@@ -231,7 +231,9 @@ class TableOperatorQueryGenerator(QueryGenerator):
|
|
|
231
231
|
using_clause = ""
|
|
232
232
|
# If the function is a NOS function, then USING clause is needed.
|
|
233
233
|
if self._function_name.lower() in [configure.write_nos_function_mapping.lower(),
|
|
234
|
-
configure.read_nos_function_mapping.lower()
|
|
234
|
+
configure.read_nos_function_mapping.lower(),
|
|
235
|
+
"td_image2matrix"
|
|
236
|
+
]:
|
|
235
237
|
using_clause = "USING"
|
|
236
238
|
invocation_sql = "{0}\n\t{1}{2}".format(invocation_sql, using_clause, self.__OTHER_ARG_CLAUSE)
|
|
237
239
|
|
|
@@ -24,6 +24,7 @@ from teradataml.utils.utils import execute_sql
|
|
|
24
24
|
from teradataml.utils.validators import _Validators
|
|
25
25
|
from functools import partial
|
|
26
26
|
from inspect import isfunction, getsource
|
|
27
|
+
from pathlib import Path
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class _TableOperatorUtils:
|
|
@@ -281,7 +282,8 @@ class _TableOperatorUtils:
|
|
|
281
282
|
"""
|
|
282
283
|
# Validate the user defined function.
|
|
283
284
|
|
|
284
|
-
if self.operation
|
|
285
|
+
if self.operation in [TableOperatorConstants.UDF_OP.value,\
|
|
286
|
+
TableOperatorConstants.REGISTER_OP.value]:
|
|
285
287
|
for udf_function in self.user_function:
|
|
286
288
|
if not isfunction(udf_function):
|
|
287
289
|
raise TypeError(Messages.get_message(
|
|
@@ -330,20 +332,30 @@ class _TableOperatorUtils:
|
|
|
330
332
|
EXAMPLES:
|
|
331
333
|
self.__create_user_script()
|
|
332
334
|
"""
|
|
333
|
-
#
|
|
334
|
-
#
|
|
335
|
-
# It has the format "<
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
335
|
+
# If operation is register, then generate script name based on the
|
|
336
|
+
# user function name and return type.
|
|
337
|
+
# It has the format "tdml_udf_name_<registered_name>_udf_type_<return_type>_register.py"
|
|
338
|
+
if self.operation == TableOperatorConstants.REGISTER_OP.value:
|
|
339
|
+
registered_name = list(self.returns.keys())[0]
|
|
340
|
+
return_type = self.returns[registered_name]
|
|
341
|
+
self.script_name = "tdml_udf_name_{}_udf_type_{}_register.py".format(registered_name, return_type)
|
|
342
|
+
self.script_base_name = Path(self.script_name).stem
|
|
343
|
+
else:
|
|
344
|
+
# Generate script name and alias, and add entry to a Garbage Collector.
|
|
345
|
+
# script_entry is the string that is added to Garbage collector.
|
|
346
|
+
# It has the format "<databasename>"."<file_id>".
|
|
347
|
+
self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
|
|
348
|
+
|
|
349
|
+
if self.operation not in [TableOperatorConstants.UDF_OP.value, TableOperatorConstants.REGISTER_OP.value]:
|
|
350
|
+
# Get the converters to use with pandas.read_csv, and to correctly
|
|
351
|
+
# typecast the numeric data.
|
|
352
|
+
python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
|
|
353
|
+
for col in self.data._metaexpr.c]
|
|
354
|
+
input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
|
|
355
|
+
|
|
356
|
+
python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
|
|
357
|
+
for type_ in list(self.returns.values())]
|
|
358
|
+
output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
|
|
347
359
|
|
|
348
360
|
# Create script in .teradataml directory.
|
|
349
361
|
script_dir = GarbageCollector._get_temp_dir_name()
|
|
@@ -357,35 +369,16 @@ class _TableOperatorUtils:
|
|
|
357
369
|
"templates")
|
|
358
370
|
# Get the template.
|
|
359
371
|
template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
|
|
360
|
-
TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value
|
|
372
|
+
TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value,
|
|
373
|
+
TableOperatorConstants.REGISTER_OP.value: TableOperatorConstants.REGISTER_TEMPLATE.value }
|
|
361
374
|
template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
|
|
362
375
|
# Write to the script based on the template.
|
|
363
376
|
try:
|
|
364
377
|
with open(os.path.join(template_dir, template_name), 'r') as input_file:
|
|
365
378
|
with open(self.script_path, 'w') as output_file:
|
|
366
379
|
if self.operation == TableOperatorConstants.UDF_OP.value:
|
|
367
|
-
|
|
368
|
-
# Function can have udf as decorator. Remove that.
|
|
369
|
-
# The below notation
|
|
370
|
-
# @udf
|
|
371
|
-
# def to_upper(s):
|
|
372
|
-
# return s.upper()
|
|
373
|
-
# Then source code will be as it is.
|
|
374
|
-
# But if below notation is used,
|
|
375
|
-
# f = udf(to_upper)
|
|
376
|
-
# Then source code will not have udf.
|
|
377
|
-
# So, remove first line if it comes with first notation.
|
|
378
|
-
# For both notations if in starting function defination have any extra space. Remove that.
|
|
379
|
-
# If multiple UDF's are there append them as a single string.
|
|
380
380
|
|
|
381
|
-
user_function_code =
|
|
382
|
-
for udf_code in self.user_function:
|
|
383
|
-
udf_code = getsource(udf_code)
|
|
384
|
-
udf_code = udf_code.lstrip()
|
|
385
|
-
if udf_code.startswith("@"):
|
|
386
|
-
udf_code = udf_code[udf_code.find("\n")+1: ].lstrip()
|
|
387
|
-
user_function_code += udf_code + '\n'
|
|
388
|
-
|
|
381
|
+
user_function_code = UtilFuncs._func_to_string(self.user_function)
|
|
389
382
|
output_file.write(input_file.read().format(
|
|
390
383
|
DELIMITER=self.delimiter,
|
|
391
384
|
QUOTECHAR=self.quotechar,
|
|
@@ -396,6 +389,13 @@ class _TableOperatorUtils:
|
|
|
396
389
|
COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
|
|
397
390
|
OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
|
|
398
391
|
))
|
|
392
|
+
elif self.operation == TableOperatorConstants.REGISTER_OP.value:
|
|
393
|
+
# Get the source code of the user function.
|
|
394
|
+
user_function_code = UtilFuncs._func_to_string(self.user_function)
|
|
395
|
+
output_file.write(input_file.read().format(
|
|
396
|
+
FUNCTION_DEFINITION=user_function_code,
|
|
397
|
+
FUNCTION_NAME = self.user_function[0].__name__
|
|
398
|
+
))
|
|
399
399
|
else:
|
|
400
400
|
# prepare script file from template file for maprow and mappartition.
|
|
401
401
|
output_file.write(
|
|
@@ -494,7 +494,6 @@ class _TableOperatorUtils:
|
|
|
494
494
|
script_name = script_alias # alias now contains extension also.
|
|
495
495
|
|
|
496
496
|
# Extract the base name without extension.
|
|
497
|
-
from pathlib import Path
|
|
498
497
|
script_base_name = Path(script_alias).stem
|
|
499
498
|
return script_entry, script_alias, script_name, script_base_name
|
|
500
499
|
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys, csv
|
|
3
|
+
import datetime
|
|
4
|
+
import urllib.parse
|
|
5
|
+
|
|
6
|
+
td_buffer = {{}}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
{FUNCTION_DEFINITION}
|
|
10
|
+
|
|
11
|
+
# Decode the URL encoded string and store it back as dictionary.
|
|
12
|
+
dec = urllib.parse.unquote_plus(sys.argv[1])
|
|
13
|
+
script_data = json.loads(dec)
|
|
14
|
+
|
|
15
|
+
# Information that is required to help with the script usage.
|
|
16
|
+
# The delimiter to use with the input and output text.
|
|
17
|
+
delimiter = script_data["delimiter"]
|
|
18
|
+
# The quotechar to use.
|
|
19
|
+
quotechar = script_data["qoutechar"]
|
|
20
|
+
# The names of columns in the input teradataml DataFrame.
|
|
21
|
+
_input_columns = script_data["input_cols"]
|
|
22
|
+
# The names of columns in the output teradataml DataFrame.
|
|
23
|
+
_output_columns = script_data["output_cols"]
|
|
24
|
+
# The types of columns in the input/output teradataml DataFrame.
|
|
25
|
+
# The mapper of output column name to function arguments
|
|
26
|
+
function_args = script_data["function_args"]
|
|
27
|
+
# The definition for new columns in output.
|
|
28
|
+
columns_definitions = {{_output_columns[-1]: "{FUNCTION_NAME}"}}
|
|
29
|
+
output_type_converters = script_data["output_type_converters"]
|
|
30
|
+
for k,v in output_type_converters.items():
|
|
31
|
+
if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
|
|
32
|
+
output_type_converters[k] = 'str'
|
|
33
|
+
output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# The entry point to the script.
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
|
|
40
|
+
records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
|
|
41
|
+
for record in records:
|
|
42
|
+
record = dict(zip(_input_columns, record))
|
|
43
|
+
out_rec = []
|
|
44
|
+
for column in _output_columns:
|
|
45
|
+
|
|
46
|
+
# If it is a new column, get the value from definition.
|
|
47
|
+
if column in columns_definitions:
|
|
48
|
+
f_args = tuple()
|
|
49
|
+
# Convert the argument types first.
|
|
50
|
+
for v in function_args[column]:
|
|
51
|
+
if v in _input_columns:
|
|
52
|
+
c_type_ = output_type_converters.get(v)
|
|
53
|
+
if record[v]:
|
|
54
|
+
# If it is a float, replace the empty character.
|
|
55
|
+
if c_type_.__name__ == 'float':
|
|
56
|
+
arg = output_type_converters.get(v)(record[v].replace(' ', ''))
|
|
57
|
+
else:
|
|
58
|
+
arg = output_type_converters.get(v)(record[v])
|
|
59
|
+
else:
|
|
60
|
+
arg = record[v]
|
|
61
|
+
else:
|
|
62
|
+
arg = v
|
|
63
|
+
f_args = f_args + (arg, )
|
|
64
|
+
func_ = globals()[columns_definitions[column]]
|
|
65
|
+
out_rec.append(output_type_converters[column](func_(*f_args)))
|
|
66
|
+
else:
|
|
67
|
+
out_rec.append(record[column])
|
|
68
|
+
|
|
69
|
+
print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
|
teradataml/utils/dtypes.py
CHANGED
|
@@ -293,10 +293,57 @@ class _DtypesMappers:
|
|
|
293
293
|
VARCHAR: lambda x: "{0},{1}".format(x.__class__.__name__, x.length)
|
|
294
294
|
}
|
|
295
295
|
|
|
296
|
+
# Holds mapping between string representation of teradatasqlalchemy type
|
|
297
|
+
# and actual teradatasqlalchemy type.
|
|
298
|
+
DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER = {
|
|
299
|
+
"CHAR": CHAR,
|
|
300
|
+
"VARCHAR": VARCHAR,
|
|
301
|
+
|
|
302
|
+
"BYTEINT": BYTEINT,
|
|
303
|
+
"SMALLINT": SMALLINT,
|
|
304
|
+
"INTEGER": INTEGER,
|
|
305
|
+
"BIGINT": BIGINT,
|
|
306
|
+
|
|
307
|
+
"REAL": FLOAT,
|
|
308
|
+
"FLOAT": FLOAT,
|
|
309
|
+
"DOUBLE": FLOAT,
|
|
310
|
+
"DECIMAL": DECIMAL,
|
|
311
|
+
"NUMBER": NUMBER,
|
|
312
|
+
|
|
313
|
+
"DATE": DATE,
|
|
314
|
+
"TIME": TIME,
|
|
315
|
+
"TIMESTAMP": TIMESTAMP,
|
|
316
|
+
"TIMESTAMP_WTZ": TIMESTAMP,
|
|
317
|
+
|
|
318
|
+
"BYTE": BYTE,
|
|
319
|
+
"VARBYTE": VARBYTE,
|
|
320
|
+
"BLOB": BLOB,
|
|
321
|
+
# TODO: Add CLOB type when support is added from OTF.
|
|
322
|
+
|
|
323
|
+
# TODO: Check these types when corresponding data type support
|
|
324
|
+
# is available from OTF support or not.
|
|
325
|
+
"INTERVAL_YEAR": INTERVAL_YEAR,
|
|
326
|
+
"INTERVAL_YTM": INTERVAL_YEAR_TO_MONTH,
|
|
327
|
+
"INTERVAL_MONTH": INTERVAL_MONTH,
|
|
328
|
+
"INTERVAL_DAY": INTERVAL_DAY,
|
|
329
|
+
|
|
330
|
+
"INTERVAL_DTH": INTERVAL_DAY_TO_HOUR,
|
|
331
|
+
"INTERVAL_DTM": INTERVAL_DAY_TO_MINUTE,
|
|
332
|
+
"INTERVAL_DTS": INTERVAL_DAY_TO_SECOND,
|
|
333
|
+
"INTERVAL_HOUR": INTERVAL_HOUR,
|
|
334
|
+
"INTERVAL_HTM": INTERVAL_HOUR_TO_MINUTE,
|
|
335
|
+
"INTERVAL_HTS": INTERVAL_HOUR_TO_SECOND,
|
|
336
|
+
"INTERVAL_MINUTE": INTERVAL_MINUTE,
|
|
337
|
+
"INTERVAL_MTS": INTERVAL_MINUTE_TO_SECOND,
|
|
338
|
+
"INTERVAL_SECOND": INTERVAL_SECOND
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
|
|
296
342
|
class _SuppArgTypes:
|
|
297
343
|
VAL_ARG_DATATYPE = (str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
|
|
298
344
|
TIMESTAMP, VARCHAR)
|
|
299
345
|
|
|
346
|
+
|
|
300
347
|
class _Dtypes:
|
|
301
348
|
|
|
302
349
|
@staticmethod
|
|
@@ -641,11 +688,13 @@ class _Dtypes:
|
|
|
641
688
|
|
|
642
689
|
"""
|
|
643
690
|
from teradataml.dataframe.dataframe import TDSeries, TDMatrix, TDGenSeries, TDAnalyticResult
|
|
691
|
+
from teradataml.store.feature_store.feature_store import Feature
|
|
644
692
|
_DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.update({"SERIES": TDSeries,
|
|
645
693
|
"MATRIX": TDMatrix,
|
|
646
694
|
"ART": TDAnalyticResult,
|
|
647
|
-
"GENSERIES": TDGenSeries
|
|
648
|
-
|
|
695
|
+
"GENSERIES": TDGenSeries,
|
|
696
|
+
"COLUMN": (str, Feature),
|
|
697
|
+
"COLUMNS": (str, Feature)})
|
|
649
698
|
|
|
650
699
|
return _DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.get(json_td_type.upper())
|
|
651
700
|
|
|
@@ -82,3 +82,21 @@ class _InternalBuffer:
|
|
|
82
82
|
"""
|
|
83
83
|
if key in cls.__data:
|
|
84
84
|
return cls.__data.get(key)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def remove_key(cls, key):
|
|
88
|
+
"""
|
|
89
|
+
DESCRIPTION:
|
|
90
|
+
Remove a particular key from the internal buffer.
|
|
91
|
+
|
|
92
|
+
RETURNS:
|
|
93
|
+
None
|
|
94
|
+
|
|
95
|
+
RAISES:
|
|
96
|
+
None
|
|
97
|
+
|
|
98
|
+
EXAMPLES:
|
|
99
|
+
# Remove all json objects from _InternalBuffer.
|
|
100
|
+
_InternalBuffer.remove_key("vs_session_id")
|
|
101
|
+
"""
|
|
102
|
+
del cls.__data[key]
|
teradataml/utils/validators.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import enum
|
|
1
2
|
import numbers
|
|
2
3
|
import os
|
|
3
4
|
import pandas as pd
|
|
@@ -11,6 +12,8 @@ from teradataml.options.configure import configure
|
|
|
11
12
|
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
12
13
|
from functools import wraps, reduce
|
|
13
14
|
|
|
15
|
+
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
16
|
+
|
|
14
17
|
|
|
15
18
|
def skip_validation():
|
|
16
19
|
"""
|
|
@@ -283,7 +286,8 @@ class _Validators:
|
|
|
283
286
|
|
|
284
287
|
@staticmethod
|
|
285
288
|
@skip_validation()
|
|
286
|
-
def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False
|
|
289
|
+
def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False,
|
|
290
|
+
case_insensitive=False):
|
|
287
291
|
"""
|
|
288
292
|
Function to check whether column names in columns are present in given dataframe or not.
|
|
289
293
|
This function is used currently only for Analytics wrappers.
|
|
@@ -309,12 +313,19 @@ class _Validators:
|
|
|
309
313
|
Specifies the name of the dataframe argument.
|
|
310
314
|
Types: str
|
|
311
315
|
|
|
312
|
-
|
|
316
|
+
is_partition_arg:
|
|
313
317
|
Optional Argument.
|
|
314
318
|
Specifies a bool argument notifying, whether argument being validate is
|
|
315
319
|
Partition argument or not.
|
|
316
320
|
Types: bool
|
|
317
321
|
|
|
322
|
+
case_insensitive:
|
|
323
|
+
Optional Argument.
|
|
324
|
+
Specifies a bool argument notifying, whether to check column names
|
|
325
|
+
in case-insensitive manner or not.
|
|
326
|
+
Default Value: False
|
|
327
|
+
Types: bool
|
|
328
|
+
|
|
318
329
|
RAISES:
|
|
319
330
|
TeradataMlException - TDMLDF_COLUMN_IN_ARG_NOT_FOUND column(s) does not exist in a dataframe.
|
|
320
331
|
|
|
@@ -356,7 +367,7 @@ class _Validators:
|
|
|
356
367
|
try:
|
|
357
368
|
# Check if its a sinlge column with one separator. For e.g. column:A.
|
|
358
369
|
# If yes, just continue.
|
|
359
|
-
_Validators._validate_column_exists_in_dataframe(column, data._metaexpr)
|
|
370
|
+
_Validators._validate_column_exists_in_dataframe(column, data._metaexpr, case_insensitive=case_insensitive)
|
|
360
371
|
continue
|
|
361
372
|
except:
|
|
362
373
|
# User has provided range value.
|
|
@@ -379,7 +390,8 @@ class _Validators:
|
|
|
379
390
|
total_columns.append(column)
|
|
380
391
|
|
|
381
392
|
return _Validators._validate_column_exists_in_dataframe(total_columns, data._metaexpr, column_arg=column_arg,
|
|
382
|
-
data_arg=data_arg)
|
|
393
|
+
data_arg=data_arg, case_insensitive=case_insensitive)
|
|
394
|
+
|
|
383
395
|
|
|
384
396
|
@staticmethod
|
|
385
397
|
@skip_validation()
|
|
@@ -545,7 +557,7 @@ class _Validators:
|
|
|
545
557
|
raise TypeError("Third element in argument information matrix should be bool.")
|
|
546
558
|
|
|
547
559
|
if not (isinstance(args[3], tuple) or isinstance(args[3], type) or
|
|
548
|
-
isinstance(args[3], (_ListOf, _TupleOf))):
|
|
560
|
+
isinstance(args[3], (_ListOf, _TupleOf)) or isinstance(args[3], enum.EnumMeta)):
|
|
549
561
|
err_msg = "Fourth element in argument information matrix should be a 'tuple of types' or 'type' type."
|
|
550
562
|
raise TypeError(err_msg)
|
|
551
563
|
|
|
@@ -1395,7 +1407,8 @@ class _Validators:
|
|
|
1395
1407
|
|
|
1396
1408
|
@staticmethod
|
|
1397
1409
|
@skip_validation()
|
|
1398
|
-
def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True
|
|
1410
|
+
def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True,
|
|
1411
|
+
case_insensitive=False):
|
|
1399
1412
|
"""
|
|
1400
1413
|
Internal function to validate the column existence and type of an input DataFrame column against
|
|
1401
1414
|
a list of unexpected types.
|
|
@@ -1461,7 +1474,7 @@ class _Validators:
|
|
|
1461
1474
|
|
|
1462
1475
|
# Check for column existence.
|
|
1463
1476
|
if check_exist:
|
|
1464
|
-
_Validators._validate_column_exists_in_dataframe(col, df._metaexpr)
|
|
1477
|
+
_Validators._validate_column_exists_in_dataframe(col, df._metaexpr, case_insensitive=case_insensitive)
|
|
1465
1478
|
|
|
1466
1479
|
if isinstance(df[col].type, unexpected_types):
|
|
1467
1480
|
if raise_error:
|
|
@@ -2274,4 +2287,82 @@ class _Validators:
|
|
|
2274
2287
|
MessageCodes.INVALID_ARG_VALUE).format(ip_address, "ip_address",
|
|
2275
2288
|
'of four numbers (each between 0 and 255) separated by periods'))
|
|
2276
2289
|
|
|
2277
|
-
return True
|
|
2290
|
+
return True
|
|
2291
|
+
|
|
2292
|
+
|
|
2293
|
+
@staticmethod
|
|
2294
|
+
@skip_validation()
|
|
2295
|
+
def _check_auth_token(func_name):
|
|
2296
|
+
"""
|
|
2297
|
+
DESCRIPTION:
|
|
2298
|
+
Check if the user has set the authentication token.
|
|
2299
|
+
|
|
2300
|
+
PARAMETERS:
|
|
2301
|
+
func_name:
|
|
2302
|
+
Required Argument.
|
|
2303
|
+
Specifies the function name where the authentication token is required.
|
|
2304
|
+
Types: str
|
|
2305
|
+
|
|
2306
|
+
RAISES:
|
|
2307
|
+
TeradataMLException
|
|
2308
|
+
|
|
2309
|
+
RETURNS:
|
|
2310
|
+
None.
|
|
2311
|
+
|
|
2312
|
+
EXAMPLES:
|
|
2313
|
+
>>> _Validators._check_auth_token("udf")
|
|
2314
|
+
"""
|
|
2315
|
+
if _InternalBuffer.get("auth_token") is None:
|
|
2316
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS,\
|
|
2317
|
+
'Auth Token', func_name,
|
|
2318
|
+
'set_auth_token'),
|
|
2319
|
+
MessageCodes.SET_REQUIRED_PARAMS)
|
|
2320
|
+
|
|
2321
|
+
return True
|
|
2322
|
+
|
|
2323
|
+
@staticmethod
|
|
2324
|
+
def _check_required_params(arg_value, arg_name, caller_func_name, target_func_name):
|
|
2325
|
+
"""
|
|
2326
|
+
DESCRIPTION:
|
|
2327
|
+
Check if the required argument is not None.
|
|
2328
|
+
|
|
2329
|
+
PARAMETERS:
|
|
2330
|
+
arg_value:
|
|
2331
|
+
Required Argument.
|
|
2332
|
+
Specifies the argument value to be
|
|
2333
|
+
checked for non None values.
|
|
2334
|
+
Types: str, float, int, bool
|
|
2335
|
+
|
|
2336
|
+
arg_name:
|
|
2337
|
+
Required Argument.
|
|
2338
|
+
Specifies the argument name.
|
|
2339
|
+
Types: str
|
|
2340
|
+
|
|
2341
|
+
caller_func_name:
|
|
2342
|
+
Required Argument.
|
|
2343
|
+
Specifies the function name which calls this function.
|
|
2344
|
+
This is required for the error message.
|
|
2345
|
+
Types: str
|
|
2346
|
+
|
|
2347
|
+
target_func_name:
|
|
2348
|
+
Required Argument.
|
|
2349
|
+
Specifies the function name which the user needs to call
|
|
2350
|
+
so that the error is fixed.
|
|
2351
|
+
This is required for the error message.
|
|
2352
|
+
Types: str
|
|
2353
|
+
|
|
2354
|
+
RAISES:
|
|
2355
|
+
TeradataMLException
|
|
2356
|
+
|
|
2357
|
+
RETURNS:
|
|
2358
|
+
True.
|
|
2359
|
+
|
|
2360
|
+
EXAMPLES:
|
|
2361
|
+
>>> _Validators._check_required_params("udf", "arg_name")
|
|
2362
|
+
"""
|
|
2363
|
+
if arg_value is None:
|
|
2364
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS, \
|
|
2365
|
+
arg_name, caller_func_name,
|
|
2366
|
+
target_func_name),
|
|
2367
|
+
MessageCodes.SET_REQUIRED_PARAMS)
|
|
2368
|
+
return True
|