teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +196 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +79 -4
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +1 -0
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/automl/data_preparation.py +3 -2
- teradataml/automl/feature_engineering.py +15 -7
- teradataml/automl/model_training.py +39 -33
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +35 -0
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +8 -2
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +25 -3
- teradataml/common/utils.py +134 -9
- teradataml/context/context.py +20 -10
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/dataframe.py +543 -175
- teradataml/dataframe/functions.py +553 -25
- teradataml/dataframe/sql.py +184 -15
- teradataml/dbutils/dbutils.py +556 -18
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
- teradataml/options/__init__.py +7 -23
- teradataml/options/configure.py +29 -3
- teradataml/scriptmgmt/UserEnv.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +74 -21
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +33 -1
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -481,6 +481,9 @@ class QueryGenerator:
|
|
|
481
481
|
return configure.read_nos_function_mapping.upper()
|
|
482
482
|
elif "WriteNOS".lower() == function_name.lower():
|
|
483
483
|
return configure.write_nos_function_mapping.upper()
|
|
484
|
+
# If Table Operator function is IMAGE2MATRIX, then return alias name as TD_IMAGE2MATRIX.
|
|
485
|
+
elif "IMAGE2MATRIX".lower() == function_name.lower():
|
|
486
|
+
return "TD_IMAGE2MATRIX"
|
|
484
487
|
|
|
485
488
|
engine_name = UtilFuncs._get_engine_name(self._engine)
|
|
486
489
|
|
|
@@ -231,7 +231,9 @@ class TableOperatorQueryGenerator(QueryGenerator):
|
|
|
231
231
|
using_clause = ""
|
|
232
232
|
# If the function is a NOS function, then USING clause is needed.
|
|
233
233
|
if self._function_name.lower() in [configure.write_nos_function_mapping.lower(),
|
|
234
|
-
configure.read_nos_function_mapping.lower()
|
|
234
|
+
configure.read_nos_function_mapping.lower(),
|
|
235
|
+
"td_image2matrix"
|
|
236
|
+
]:
|
|
235
237
|
using_clause = "USING"
|
|
236
238
|
invocation_sql = "{0}\n\t{1}{2}".format(invocation_sql, using_clause, self.__OTHER_ARG_CLAUSE)
|
|
237
239
|
|
|
@@ -24,6 +24,7 @@ from teradataml.utils.utils import execute_sql
|
|
|
24
24
|
from teradataml.utils.validators import _Validators
|
|
25
25
|
from functools import partial
|
|
26
26
|
from inspect import isfunction, getsource
|
|
27
|
+
from pathlib import Path
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class _TableOperatorUtils:
|
|
@@ -281,7 +282,8 @@ class _TableOperatorUtils:
|
|
|
281
282
|
"""
|
|
282
283
|
# Validate the user defined function.
|
|
283
284
|
|
|
284
|
-
if self.operation
|
|
285
|
+
if self.operation in [TableOperatorConstants.UDF_OP.value,\
|
|
286
|
+
TableOperatorConstants.REGISTER_OP.value]:
|
|
285
287
|
for udf_function in self.user_function:
|
|
286
288
|
if not isfunction(udf_function):
|
|
287
289
|
raise TypeError(Messages.get_message(
|
|
@@ -330,20 +332,30 @@ class _TableOperatorUtils:
|
|
|
330
332
|
EXAMPLES:
|
|
331
333
|
self.__create_user_script()
|
|
332
334
|
"""
|
|
333
|
-
#
|
|
334
|
-
#
|
|
335
|
-
# It has the format "<
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
335
|
+
# If operation is register, then generate script name based on the
|
|
336
|
+
# user function name and return type.
|
|
337
|
+
# It has the format "tdml_udf_name_<registered_name>_udf_type_<return_type>_register.py"
|
|
338
|
+
if self.operation == TableOperatorConstants.REGISTER_OP.value:
|
|
339
|
+
registered_name = list(self.returns.keys())[0]
|
|
340
|
+
return_type = self.returns[registered_name]
|
|
341
|
+
self.script_name = "tdml_udf_name_{}_udf_type_{}_register.py".format(registered_name, return_type)
|
|
342
|
+
self.script_base_name = Path(self.script_name).stem
|
|
343
|
+
else:
|
|
344
|
+
# Generate script name and alias, and add entry to a Garbage Collector.
|
|
345
|
+
# script_entry is the string that is added to Garbage collector.
|
|
346
|
+
# It has the format "<databasename>"."<file_id>".
|
|
347
|
+
self.script_entry, self.script_alias, self.script_name, self.script_base_name = self.__get_script_name()
|
|
348
|
+
|
|
349
|
+
if self.operation not in [TableOperatorConstants.UDF_OP.value, TableOperatorConstants.REGISTER_OP.value]:
|
|
350
|
+
# Get the converters to use with pandas.read_csv, and to correctly
|
|
351
|
+
# typecast the numeric data.
|
|
352
|
+
python_input_col_types = [UtilFuncs._teradata_type_to_python_type(col.type)
|
|
353
|
+
for col in self.data._metaexpr.c]
|
|
354
|
+
input_converters = UtilFuncs._get_pandas_converters(python_input_col_types)
|
|
355
|
+
|
|
356
|
+
python_output_col_types = [UtilFuncs._teradata_type_to_python_type(type_)
|
|
357
|
+
for type_ in list(self.returns.values())]
|
|
358
|
+
output_converters = UtilFuncs._get_pandas_converters(python_output_col_types)
|
|
347
359
|
|
|
348
360
|
# Create script in .teradataml directory.
|
|
349
361
|
script_dir = GarbageCollector._get_temp_dir_name()
|
|
@@ -357,35 +369,16 @@ class _TableOperatorUtils:
|
|
|
357
369
|
"templates")
|
|
358
370
|
# Get the template.
|
|
359
371
|
template = {TableOperatorConstants.APPLY_OP.value: TableOperatorConstants.APPLY_TEMPLATE.value,
|
|
360
|
-
TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value
|
|
372
|
+
TableOperatorConstants.UDF_OP.value: TableOperatorConstants.UDF_TEMPLATE.value,
|
|
373
|
+
TableOperatorConstants.REGISTER_OP.value: TableOperatorConstants.REGISTER_TEMPLATE.value }
|
|
361
374
|
template_name = template.get(self.operation, TableOperatorConstants.MAP_TEMPLATE.value)
|
|
362
375
|
# Write to the script based on the template.
|
|
363
376
|
try:
|
|
364
377
|
with open(os.path.join(template_dir, template_name), 'r') as input_file:
|
|
365
378
|
with open(self.script_path, 'w') as output_file:
|
|
366
379
|
if self.operation == TableOperatorConstants.UDF_OP.value:
|
|
367
|
-
|
|
368
|
-
# Function can have udf as decorator. Remove that.
|
|
369
|
-
# The below notation
|
|
370
|
-
# @udf
|
|
371
|
-
# def to_upper(s):
|
|
372
|
-
# return s.upper()
|
|
373
|
-
# Then source code will be as it is.
|
|
374
|
-
# But if below notation is used,
|
|
375
|
-
# f = udf(to_upper)
|
|
376
|
-
# Then source code will not have udf.
|
|
377
|
-
# So, remove first line if it comes with first notation.
|
|
378
|
-
# For both notations if in starting function defination have any extra space. Remove that.
|
|
379
|
-
# If multiple UDF's are there append them as a single string.
|
|
380
380
|
|
|
381
|
-
user_function_code =
|
|
382
|
-
for udf_code in self.user_function:
|
|
383
|
-
udf_code = getsource(udf_code)
|
|
384
|
-
udf_code = udf_code.lstrip()
|
|
385
|
-
if udf_code.startswith("@"):
|
|
386
|
-
udf_code = udf_code[udf_code.find("\n")+1: ].lstrip()
|
|
387
|
-
user_function_code += udf_code + '\n'
|
|
388
|
-
|
|
381
|
+
user_function_code = UtilFuncs._func_to_string(self.user_function)
|
|
389
382
|
output_file.write(input_file.read().format(
|
|
390
383
|
DELIMITER=self.delimiter,
|
|
391
384
|
QUOTECHAR=self.quotechar,
|
|
@@ -396,6 +389,13 @@ class _TableOperatorUtils:
|
|
|
396
389
|
COLUMNS_DEFINITIONS=json.dumps(self.columns_definitions),
|
|
397
390
|
OUTPUT_TYPE_CONVERTERS=json.dumps(self.output_type_converters)
|
|
398
391
|
))
|
|
392
|
+
elif self.operation == TableOperatorConstants.REGISTER_OP.value:
|
|
393
|
+
# Get the source code of the user function.
|
|
394
|
+
user_function_code = UtilFuncs._func_to_string(self.user_function)
|
|
395
|
+
output_file.write(input_file.read().format(
|
|
396
|
+
FUNCTION_DEFINITION=user_function_code,
|
|
397
|
+
FUNCTION_NAME = self.user_function[0].__name__
|
|
398
|
+
))
|
|
399
399
|
else:
|
|
400
400
|
# prepare script file from template file for maprow and mappartition.
|
|
401
401
|
output_file.write(
|
|
@@ -494,7 +494,6 @@ class _TableOperatorUtils:
|
|
|
494
494
|
script_name = script_alias # alias now contains extension also.
|
|
495
495
|
|
|
496
496
|
# Extract the base name without extension.
|
|
497
|
-
from pathlib import Path
|
|
498
497
|
script_base_name = Path(script_alias).stem
|
|
499
498
|
return script_entry, script_alias, script_name, script_base_name
|
|
500
499
|
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys, csv
|
|
3
|
+
import datetime
|
|
4
|
+
import urllib.parse
|
|
5
|
+
|
|
6
|
+
td_buffer = {{}}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
{FUNCTION_DEFINITION}
|
|
10
|
+
|
|
11
|
+
# Decode the URL encoded string and store it back as dictionary.
|
|
12
|
+
dec = urllib.parse.unquote_plus(sys.argv[1])
|
|
13
|
+
script_data = json.loads(dec)
|
|
14
|
+
|
|
15
|
+
# Information that is required to help with the script usage.
|
|
16
|
+
# The delimiter to use with the input and output text.
|
|
17
|
+
delimiter = script_data["delimiter"]
|
|
18
|
+
# The quotechar to use.
|
|
19
|
+
quotechar = script_data["qoutechar"]
|
|
20
|
+
# The names of columns in the input teradataml DataFrame.
|
|
21
|
+
_input_columns = script_data["input_cols"]
|
|
22
|
+
# The names of columns in the output teradataml DataFrame.
|
|
23
|
+
_output_columns = script_data["output_cols"]
|
|
24
|
+
# The types of columns in the input/output teradataml DataFrame.
|
|
25
|
+
# The mapper of output column name to function arguments
|
|
26
|
+
function_args = script_data["function_args"]
|
|
27
|
+
# The definition for new columns in output.
|
|
28
|
+
columns_definitions = {{_output_columns[-1]: "{FUNCTION_NAME}"}}
|
|
29
|
+
output_type_converters = script_data["output_type_converters"]
|
|
30
|
+
for k,v in output_type_converters.items():
|
|
31
|
+
if v == 'datetime.date' or v == 'datetime.time' or v == 'datetime.datetime':
|
|
32
|
+
output_type_converters[k] = 'str'
|
|
33
|
+
output_type_converters = {{k:getattr(__builtins__, v) for k,v in output_type_converters.items()}}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# The entry point to the script.
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
|
|
40
|
+
records = csv.reader(sys.stdin.readlines(), delimiter=delimiter, quotechar=quotechar)
|
|
41
|
+
for record in records:
|
|
42
|
+
record = dict(zip(_input_columns, record))
|
|
43
|
+
out_rec = []
|
|
44
|
+
for column in _output_columns:
|
|
45
|
+
|
|
46
|
+
# If it is a new column, get the value from definition.
|
|
47
|
+
if column in columns_definitions:
|
|
48
|
+
f_args = tuple()
|
|
49
|
+
# Convert the argument types first.
|
|
50
|
+
for v in function_args[column]:
|
|
51
|
+
if v in _input_columns:
|
|
52
|
+
c_type_ = output_type_converters.get(v)
|
|
53
|
+
if record[v]:
|
|
54
|
+
# If it is a float, replace the empty character.
|
|
55
|
+
if c_type_.__name__ == 'float':
|
|
56
|
+
arg = output_type_converters.get(v)(record[v].replace(' ', ''))
|
|
57
|
+
else:
|
|
58
|
+
arg = output_type_converters.get(v)(record[v])
|
|
59
|
+
else:
|
|
60
|
+
arg = record[v]
|
|
61
|
+
else:
|
|
62
|
+
arg = v
|
|
63
|
+
f_args = f_args + (arg, )
|
|
64
|
+
func_ = globals()[columns_definitions[column]]
|
|
65
|
+
out_rec.append(output_type_converters[column](func_(*f_args)))
|
|
66
|
+
else:
|
|
67
|
+
out_rec.append(record[column])
|
|
68
|
+
|
|
69
|
+
print("{{}}".format(delimiter).join((str(i) for i in out_rec)))
|
teradataml/utils/dtypes.py
CHANGED
|
@@ -641,11 +641,13 @@ class _Dtypes:
|
|
|
641
641
|
|
|
642
642
|
"""
|
|
643
643
|
from teradataml.dataframe.dataframe import TDSeries, TDMatrix, TDGenSeries, TDAnalyticResult
|
|
644
|
+
from teradataml.store.feature_store.feature_store import Feature
|
|
644
645
|
_DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.update({"SERIES": TDSeries,
|
|
645
646
|
"MATRIX": TDMatrix,
|
|
646
647
|
"ART": TDAnalyticResult,
|
|
647
|
-
"GENSERIES": TDGenSeries
|
|
648
|
-
|
|
648
|
+
"GENSERIES": TDGenSeries,
|
|
649
|
+
"COLUMN": (str, Feature),
|
|
650
|
+
"COLUMNS": (str, Feature)})
|
|
649
651
|
|
|
650
652
|
return _DtypesMappers.JSON_TD_TO_PYTHON_TYPE_MAPPER.get(json_td_type.upper())
|
|
651
653
|
|
teradataml/utils/validators.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import enum
|
|
1
2
|
import numbers
|
|
2
3
|
import os
|
|
3
4
|
import pandas as pd
|
|
@@ -11,6 +12,8 @@ from teradataml.options.configure import configure
|
|
|
11
12
|
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
12
13
|
from functools import wraps, reduce
|
|
13
14
|
|
|
15
|
+
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
16
|
+
|
|
14
17
|
|
|
15
18
|
def skip_validation():
|
|
16
19
|
"""
|
|
@@ -545,7 +548,7 @@ class _Validators:
|
|
|
545
548
|
raise TypeError("Third element in argument information matrix should be bool.")
|
|
546
549
|
|
|
547
550
|
if not (isinstance(args[3], tuple) or isinstance(args[3], type) or
|
|
548
|
-
isinstance(args[3], (_ListOf, _TupleOf))):
|
|
551
|
+
isinstance(args[3], (_ListOf, _TupleOf)) or isinstance(args[3], enum.EnumMeta)):
|
|
549
552
|
err_msg = "Fourth element in argument information matrix should be a 'tuple of types' or 'type' type."
|
|
550
553
|
raise TypeError(err_msg)
|
|
551
554
|
|
|
@@ -2274,4 +2277,33 @@ class _Validators:
|
|
|
2274
2277
|
MessageCodes.INVALID_ARG_VALUE).format(ip_address, "ip_address",
|
|
2275
2278
|
'of four numbers (each between 0 and 255) separated by periods'))
|
|
2276
2279
|
|
|
2280
|
+
return True
|
|
2281
|
+
|
|
2282
|
+
|
|
2283
|
+
@staticmethod
|
|
2284
|
+
@skip_validation()
|
|
2285
|
+
def _check_auth_token(func_name):
|
|
2286
|
+
"""
|
|
2287
|
+
DESCRIPTION:
|
|
2288
|
+
Check if the user has set the authentication token.
|
|
2289
|
+
|
|
2290
|
+
PARAMETERS:
|
|
2291
|
+
func_name:
|
|
2292
|
+
Required Argument.
|
|
2293
|
+
Specifies the function name where the authentication token is required.
|
|
2294
|
+
Types: str
|
|
2295
|
+
|
|
2296
|
+
RAISES:
|
|
2297
|
+
TeradataMLException
|
|
2298
|
+
|
|
2299
|
+
RETURNS:
|
|
2300
|
+
None.
|
|
2301
|
+
|
|
2302
|
+
EXAMPLES:
|
|
2303
|
+
>>> _Validators._check_auth_token("udf")
|
|
2304
|
+
"""
|
|
2305
|
+
if _InternalBuffer.get("auth_token") is None:
|
|
2306
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.AUTH_TOKEN_REQUIRED,\
|
|
2307
|
+
func_name), MessageCodes.AUTH_TOKEN_REQUIRED)
|
|
2308
|
+
|
|
2277
2309
|
return True
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: teradataml
|
|
3
|
-
Version: 20.0.0.
|
|
3
|
+
Version: 20.0.0.3
|
|
4
4
|
Summary: Teradata Vantage Python package for Advanced Analytics
|
|
5
5
|
Home-page: http://www.teradata.com/
|
|
6
6
|
Author: Teradata Corporation
|
|
@@ -17,8 +17,8 @@ Classifier: Topic :: Database :: Front-Ends
|
|
|
17
17
|
Classifier: License :: Other/Proprietary License
|
|
18
18
|
Requires-Python: >=3.8
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: teradatasql (>=
|
|
21
|
-
Requires-Dist: teradatasqlalchemy (>=20.0.0.
|
|
20
|
+
Requires-Dist: teradatasql (>=20.0.0.19)
|
|
21
|
+
Requires-Dist: teradatasqlalchemy (>=20.0.0.3)
|
|
22
22
|
Requires-Dist: pandas (>=0.22)
|
|
23
23
|
Requires-Dist: psutil
|
|
24
24
|
Requires-Dist: requests (>=2.25.1)
|
|
@@ -28,6 +28,7 @@ Requires-Dist: imbalanced-learn (>=0.8.0)
|
|
|
28
28
|
Requires-Dist: pyjwt (>=2.8.0)
|
|
29
29
|
Requires-Dist: cryptography (>=42.0.5)
|
|
30
30
|
Requires-Dist: sqlalchemy (>=2.0)
|
|
31
|
+
Requires-Dist: lightgbm (>=3.3.3)
|
|
31
32
|
|
|
32
33
|
## Teradata Python package for Advanced Analytics.
|
|
33
34
|
|
|
@@ -47,6 +48,187 @@ Copyright 2024, Teradata. All Rights Reserved.
|
|
|
47
48
|
* [License](#license)
|
|
48
49
|
|
|
49
50
|
## Release Notes:
|
|
51
|
+
|
|
52
|
+
#### teradataml 20.00.00.03
|
|
53
|
+
|
|
54
|
+
* teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
|
|
55
|
+
|
|
56
|
+
* ##### New Features/Functionality
|
|
57
|
+
* ###### teradataml: DataFrame
|
|
58
|
+
* New Function
|
|
59
|
+
* `alias()` - Creates a DataFrame with alias name.
|
|
60
|
+
* New Properties
|
|
61
|
+
* `db_object_name` - Get the underlying database object name, on which DataFrame is created.
|
|
62
|
+
|
|
63
|
+
* ###### teradataml: GeoDataFrame
|
|
64
|
+
* New Function
|
|
65
|
+
* `alias()` - Creates a GeoDataFrame with alias name.
|
|
66
|
+
|
|
67
|
+
* ###### teradataml: DataFrameColumn a.k.a. ColumnExpression
|
|
68
|
+
* _Arithmetic Functions_
|
|
69
|
+
* `DataFrameColumn.isnan()` - Function evaluates expression to determine if the floating-point
|
|
70
|
+
argument is a NaN (Not-a-Number) value.
|
|
71
|
+
* `DataFrameColumn.isinf()` - Function evaluates expression to determine if the floating-point
|
|
72
|
+
argument is an infinite number.
|
|
73
|
+
* `DataFrameColumn.isfinite()` - Function evaluates expression to determine if it is a finite
|
|
74
|
+
floating value.
|
|
75
|
+
|
|
76
|
+
* ###### FeatureStore - handles feature management within the Vantage environment
|
|
77
|
+
* FeatureStore Components
|
|
78
|
+
* Feature - Represents a feature which is used in ML Modeling.
|
|
79
|
+
* Entity - Represents the columns which serves as uniqueness for the data used in ML Modeling.
|
|
80
|
+
* DataSource - Represents the source of Data.
|
|
81
|
+
* FeatureGroup - Collection of Feature, Entity and DataSource.
|
|
82
|
+
* Methods
|
|
83
|
+
* `apply()` - Adds Feature, Entity, DataSource to a FeatureGroup.
|
|
84
|
+
* `from_DataFrame()` - Creates a FeatureGroup from teradataml DataFrame.
|
|
85
|
+
* `from_query()` - Creates a FeatureGroup using a SQL query.
|
|
86
|
+
* `remove()` - Removes Feature, Entity, or DataSource from a FeatureGroup.
|
|
87
|
+
* `reset_labels()` - Removes the labels assigned to the FeatureGroup, that are set using `set_labels()`.
|
|
88
|
+
* `set_labels()` - Sets the Features as labels for a FeatureGroup.
|
|
89
|
+
* Properties
|
|
90
|
+
* `features` - Get the features of a FeatureGroup.
|
|
91
|
+
* `labels` - Get the labels of FeatureGroup.
|
|
92
|
+
* FeatureStore
|
|
93
|
+
* Methods
|
|
94
|
+
* `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
|
|
95
|
+
* `archive_data_source()` - Archives a specified DataSource from a FeatureStore.
|
|
96
|
+
* `archive_entity()` - Archives a specified Entity from a FeatureStore.
|
|
97
|
+
* `archive_feature()` - Archives a specified Feature from a FeatureStore.
|
|
98
|
+
* `archive_feature_group()` - Archives a specified FeatureGroup from a FeatureStore. Method archives underlying Feature, Entity, DataSource also.
|
|
99
|
+
* `delete_data_source()` - Deletes an archived DataSource.
|
|
100
|
+
* `delete_entity()` - Deletes an archived Entity.
|
|
101
|
+
* `delete_feature()` - Deletes an archived Feature.
|
|
102
|
+
* `delete_feature_group()` - Deletes an archived FeatureGroup.
|
|
103
|
+
* `get_data_source()` - Get the DataSources associated with FeatureStore.
|
|
104
|
+
* `get_dataset()` - Get the teradataml DataFrame based on Features, Entities and DataSource from FeatureGroup.
|
|
105
|
+
* `get_entity()` - Get the Entity associated with FeatureStore.
|
|
106
|
+
* `get_feature()` - Get the Feature associated with FeatureStore.
|
|
107
|
+
* `get_feature_group()` - Get the FeatureGroup associated with FeatureStore.
|
|
108
|
+
* `list_data_sources()` - List DataSources.
|
|
109
|
+
* `list_entities()` - List Entities.
|
|
110
|
+
* `list_feature_groups()` - List FeatureGroups.
|
|
111
|
+
* `list_features()` - List Features.
|
|
112
|
+
* `list_repos()` - List available repos which are configured for FeatureStore.
|
|
113
|
+
* `repair()` - Repairs the underlying FeatureStore schema on database.
|
|
114
|
+
* `set_features_active()` - Marks the Features as active.
|
|
115
|
+
* `set_features_inactive()` - Marks the Features as inactive.
|
|
116
|
+
* `setup()` - Setup the FeatureStore for a repo.
|
|
117
|
+
* Property
|
|
118
|
+
* `repo` - Property for FeatureStore repo.
|
|
119
|
+
* `grant` - Property to Grant access on FeatureStore to user.
|
|
120
|
+
* `revoke` - Property to Revoke access on FeatureStore from user.
|
|
121
|
+
|
|
122
|
+
* ###### teradataml: Table Operator Functions
|
|
123
|
+
* `Image2Matrix()` - Converts an image into a matrix.
|
|
124
|
+
|
|
125
|
+
* ###### teradataml: SQLE Engine Analytic Functions
|
|
126
|
+
* New Analytics Database Analytic Functions:
|
|
127
|
+
* `CFilter()`
|
|
128
|
+
* `NaiveBayes()`
|
|
129
|
+
* `TDNaiveBayesPredict()`
|
|
130
|
+
* `Shap()`
|
|
131
|
+
* `SMOTE()`
|
|
132
|
+
|
|
133
|
+
* ###### teradataml: Unbounded Array Framework (UAF) Functions
|
|
134
|
+
* New Unbounded Array Framework(UAF) Functions:
|
|
135
|
+
* `CopyArt()`
|
|
136
|
+
|
|
137
|
+
* ###### General functions
|
|
138
|
+
* Vantage File Management Functions
|
|
139
|
+
* `list_files()` - List the installed files in Database.
|
|
140
|
+
|
|
141
|
+
* ###### OpensourceML: LightGBM
|
|
142
|
+
* teradataml adds support for lightGBM package through `OpensourceML` (`OpenML`) feature.
|
|
143
|
+
The following functionality is added in the current release:
|
|
144
|
+
* `td_lightgbm` - Interface object to run lightgbm functions and classes through Teradata Vantage.
|
|
145
|
+
Example usage below:
|
|
146
|
+
```
|
|
147
|
+
from teradataml import td_lightgbm, DataFrame
|
|
148
|
+
|
|
149
|
+
df_train = DataFrame("multi_model_classification")
|
|
150
|
+
|
|
151
|
+
feature_columns = ["col1", "col2", "col3", "col4"]
|
|
152
|
+
label_columns = ["label"]
|
|
153
|
+
part_columns = ["partition_column_1", "partition_column_2"]
|
|
154
|
+
|
|
155
|
+
df_x = df_train.select(feature_columns)
|
|
156
|
+
df_y = df_train.select(label_columns)
|
|
157
|
+
|
|
158
|
+
# Dataset creation.
|
|
159
|
+
# Single model case.
|
|
160
|
+
obj_s = td_lightgbm.Dataset(df_x, df_y, silent=True, free_raw_data=False)
|
|
161
|
+
|
|
162
|
+
# Multi model case.
|
|
163
|
+
obj_m = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
|
|
164
|
+
obj_m_v = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
|
|
165
|
+
|
|
166
|
+
## Model training.
|
|
167
|
+
# Single model case.
|
|
168
|
+
opt = td_lightgbm.train(params={}, train_set = obj_s, num_boost_round=30)
|
|
169
|
+
|
|
170
|
+
opt.predict(data=df_x, num_iteration=20, pred_contrib=True)
|
|
171
|
+
|
|
172
|
+
# Multi model case.
|
|
173
|
+
opt = td_lightgbm.train(params={}, train_set = obj_m, num_boost_round=30,
|
|
174
|
+
callbacks=[td_lightgbm.record_evaluation(rec)],
|
|
175
|
+
valid_sets=[obj_m_v, obj_m_v])
|
|
176
|
+
|
|
177
|
+
# Passing `label` argument to get it returned in output DataFrame.
|
|
178
|
+
opt.predict(data=df_x, label=df_y, num_iteration=20)
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
* Added support for accessing scikit-learn APIs using exposed inteface object `td_lightgbm`.
|
|
182
|
+
|
|
183
|
+
Refer Teradata Python Package User Guide for more details of this feature, arguments, usage, examples and supportability in Vantage.
|
|
184
|
+
|
|
185
|
+
* ###### teradataml: Functions
|
|
186
|
+
* `register()` - Registers a user defined function (UDF).
|
|
187
|
+
* `call_udf()` - Calls a registered user defined function (UDF) and returns ColumnExpression.
|
|
188
|
+
* `list_udfs()` - List all the UDFs registered using 'register()' function.
|
|
189
|
+
* `deregister()` - Deregisters a user defined function (UDF).
|
|
190
|
+
|
|
191
|
+
* ###### teradataml: Options
|
|
192
|
+
* Configuration Options
|
|
193
|
+
* `table_operator` - Specifies the name of table operator.
|
|
194
|
+
|
|
195
|
+
* ##### Updates
|
|
196
|
+
* ###### General functions
|
|
197
|
+
* `set_auth_token()` - Added `base_url` parameter which accepts the CCP url.
|
|
198
|
+
'ues_url' will be deprecated in future and users
|
|
199
|
+
will need to specify 'base_url' instead.
|
|
200
|
+
|
|
201
|
+
* ###### teradataml: DataFrame function
|
|
202
|
+
* `join()`
|
|
203
|
+
* Now supports compound ColumExpression having more than one binary operator in `on` argument.
|
|
204
|
+
* Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
|
|
205
|
+
* self-join now expects aliased DataFrame in `other` argument.
|
|
206
|
+
|
|
207
|
+
* ###### teradataml: GeoDataFrame function
|
|
208
|
+
* `join()`
|
|
209
|
+
* Now supports compound ColumExpression having more than one binary operator in `on` argument.
|
|
210
|
+
* Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
|
|
211
|
+
* self-join now expects aliased DataFrame in `other` argument.
|
|
212
|
+
|
|
213
|
+
* ###### teradataml: Unbounded Array Framework (UAF) Functions
|
|
214
|
+
* `SAX()` - Default value added for `window_size` and `output_frequency`.
|
|
215
|
+
* `DickeyFuller()`
|
|
216
|
+
* Supports TDAnalyticResult as input.
|
|
217
|
+
* Default value added for `max_lags`.
|
|
218
|
+
* Removed parameter `drift_trend_formula`.
|
|
219
|
+
* Updated permitted values for `algorithm`.
|
|
220
|
+
|
|
221
|
+
* ##### teradataml: AutoML
|
|
222
|
+
* `AutoML`, `AutoRegressor` and `AutoClassifier`
|
|
223
|
+
* Now supports DECIMAL datatype as input.
|
|
224
|
+
|
|
225
|
+
* ##### teradataml: SQLE Engine Analytic Functions
|
|
226
|
+
* `TextParser()`
|
|
227
|
+
* Argument name `covert_to_lowercase` changed to `convert_to_lowercase`.
|
|
228
|
+
|
|
229
|
+
* ##### Bug Fixes
|
|
230
|
+
* `db_list_tables()` now returns correct results when '%' is used.
|
|
231
|
+
|
|
50
232
|
#### teradataml 20.00.00.02
|
|
51
233
|
|
|
52
234
|
* teradataml will no longer be supported with SQLAlchemy < 2.0.
|
|
@@ -115,6 +297,10 @@ Copyright 2024, Teradata. All Rights Reserved.
|
|
|
115
297
|
* `ues_url`
|
|
116
298
|
* `auth_token`
|
|
117
299
|
|
|
300
|
+
* #### teradata DataFrame
|
|
301
|
+
* `to_pandas()` - Function returns the pandas dataframe with Decimal columns types as float instead of object.
|
|
302
|
+
If user want datatype to be object, set argument `coerce_float` to False.
|
|
303
|
+
|
|
118
304
|
* ###### Database Utility
|
|
119
305
|
* `list_td_reserved_keywords()` - Accepts a list of strings as argument.
|
|
120
306
|
|
|
@@ -133,7 +319,7 @@ Copyright 2024, Teradata. All Rights Reserved.
|
|
|
133
319
|
* ##### Bug Fixes
|
|
134
320
|
* KNN `predict()` function can now predict on test data which does not contain target column.
|
|
135
321
|
* Metrics functions are supported on the Lake system.
|
|
136
|
-
* The following OpensourceML functions from different sklearn modules are fixed.
|
|
322
|
+
* The following OpensourceML functions from different sklearn modules in single model case are fixed.
|
|
137
323
|
* `sklearn.ensemble`:
|
|
138
324
|
* ExtraTreesClassifier - `apply()`
|
|
139
325
|
* ExtraTreesRegressor - `apply()`
|
|
@@ -146,12 +332,21 @@ Copyright 2024, Teradata. All Rights Reserved.
|
|
|
146
332
|
* Nystroem - `transform()`, `fit_transform()`
|
|
147
333
|
* PolynomialCountSketch - `transform()`, `fit_transform()`
|
|
148
334
|
* RBFSampler - `transform()`, `fit_transform()`
|
|
149
|
-
* `sklearn.
|
|
335
|
+
* `sklearn.neighbors`:
|
|
150
336
|
* KNeighborsTransformer - `transform()`, `fit_transform()`
|
|
151
337
|
* RadiusNeighborsTransformer - `transform()`, `fit_transform()`
|
|
152
338
|
* `sklearn.preprocessing`:
|
|
153
339
|
* KernelCenterer - `transform()`
|
|
154
340
|
* OneHotEncoder - `transform()`, `inverse_transform()`
|
|
341
|
+
* The following OpensourceML functions from different sklearn modules in multi model case are fixed.
|
|
342
|
+
* `sklearn.feature_selection`:
|
|
343
|
+
* SelectFpr - `transform()`, `fit_transform()`, `inverse_transform()`
|
|
344
|
+
* SelectFdr - `transform()`, `fit_transform()`, `inverse_transform()`
|
|
345
|
+
* SelectFromModel - `transform()`, `fit_transform()`, `inverse_transform()`
|
|
346
|
+
* SelectFwe - `transform()`, `fit_transform()`, `inverse_transform()`
|
|
347
|
+
* RFECV - `transform()`, `fit_transform()`, `inverse_transform()`
|
|
348
|
+
* `sklearn.clustering`:
|
|
349
|
+
* Birch - `transform()`, `fit_transform()`
|
|
155
350
|
* OpensourceML returns teradataml objects for model attributes and functions instead of sklearn
|
|
156
351
|
objects so that the user can perform further operations like `score()`, `predict()` etc on top
|
|
157
352
|
of the returned objects.
|