teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +182 -13
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +8 -13
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +16 -1
- teradataml/analytics/utils.py +60 -1
- teradataml/automl/__init__.py +290 -106
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +29 -10
- teradataml/automl/data_transformation.py +11 -0
- teradataml/automl/feature_engineering.py +64 -4
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +1 -1
- teradataml/clients/auth_client.py +12 -8
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/common/constants.py +71 -26
- teradataml/common/exceptions.py +32 -0
- teradataml/common/messagecodes.py +28 -0
- teradataml/common/messages.py +13 -4
- teradataml/common/sqlbundle.py +3 -2
- teradataml/common/utils.py +345 -45
- teradataml/context/context.py +259 -93
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +1 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -1
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/url_data.csv +10 -9
- teradataml/dataframe/copy_to.py +38 -27
- teradataml/dataframe/data_transfer.py +61 -45
- teradataml/dataframe/dataframe.py +1110 -132
- teradataml/dataframe/dataframe_utils.py +73 -27
- teradataml/dataframe/functions.py +1070 -9
- teradataml/dataframe/sql.py +750 -959
- teradataml/dbutils/dbutils.py +33 -13
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/utils.py +4 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/_base.py +12 -157
- teradataml/options/configure.py +24 -9
- teradataml/scriptmgmt/UserEnv.py +317 -39
- teradataml/scriptmgmt/lls_utils.py +456 -135
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +897 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +406 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/store/__init__.py +1 -1
- teradataml/table_operators/Apply.py +16 -1
- teradataml/table_operators/Script.py +20 -1
- teradataml/table_operators/query_generator.py +4 -21
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/internal_buffer.py +22 -2
- teradataml/utils/utils.py +0 -1
- teradataml/utils/validators.py +318 -58
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
|
@@ -652,7 +652,7 @@ class DataFrameUtils():
|
|
|
652
652
|
all_operations = list(set(all_operations))
|
|
653
653
|
invalid_aggregates = []
|
|
654
654
|
for operation in all_operations:
|
|
655
|
-
if operation not in valid_aggregate_operations \
|
|
655
|
+
if operation not in valid_aggregate_operations and not operation.startswith('percentile_') \
|
|
656
656
|
and operation not in UtilFuncs._get_valid_time_series_aggregate_operations():
|
|
657
657
|
invalid_aggregates.append(operation)
|
|
658
658
|
if len(invalid_aggregates) > 0: # If any of the aggregate operations specified is not valid
|
|
@@ -735,7 +735,20 @@ class DataFrameUtils():
|
|
|
735
735
|
quoted_columns = UtilFuncs._process_for_teradata_keyword(kwargs[key_to_process])
|
|
736
736
|
kwargs[key_to_process] = quoted_columns
|
|
737
737
|
|
|
738
|
-
|
|
738
|
+
if operation.startswith('percentile_'):
|
|
739
|
+
try:
|
|
740
|
+
_operation_value = operation.split('_')
|
|
741
|
+
_floatvalue = float(_operation_value[1])
|
|
742
|
+
if _floatvalue < 0.0 or _floatvalue > 1.0 or len(_operation_value)>2:
|
|
743
|
+
raise ValueError
|
|
744
|
+
except ValueError:
|
|
745
|
+
mssg = "Invalid aggregate operation '{}' requested on TeradataML DataFrame." \
|
|
746
|
+
" Valid operation should be in format 'percentile_<floatvalue>' and <floatvalue> " \
|
|
747
|
+
"should be in range [0.0, 1.0].".format(operation)
|
|
748
|
+
raise ValueError(mssg) from None
|
|
749
|
+
func_expression = getattr(df[column], 'percentile')(percentile=_floatvalue)
|
|
750
|
+
else:
|
|
751
|
+
func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
|
|
739
752
|
new_column_name = column if describe_op else "{1}_{0}".format(column, operation)
|
|
740
753
|
# column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str
|
|
741
754
|
return True, new_column_name, NUMBER() if describe_op else func_expression.type, \
|
|
@@ -1940,7 +1953,7 @@ class DataFrameUtils():
|
|
|
1940
1953
|
return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]()
|
|
1941
1954
|
|
|
1942
1955
|
@staticmethod
|
|
1943
|
-
def _get_datalake_table_columns_info(schema, table_name, datalake):
|
|
1956
|
+
def _get_datalake_table_columns_info(schema, table_name, datalake, use_dialect=False):
|
|
1944
1957
|
"""
|
|
1945
1958
|
Function to get column names and corresponding teradatasqlalchemy types
|
|
1946
1959
|
of a datalake table using results of 'help table <datalake>.<db_name>.<table_name>'
|
|
@@ -1977,31 +1990,64 @@ class DataFrameUtils():
|
|
|
1977
1990
|
VARCHAR(length=2000, charset='UNICODE'),
|
|
1978
1991
|
INTEGER()])
|
|
1979
1992
|
"""
|
|
1980
|
-
# Get the column information from the strings type.
|
|
1981
|
-
prepared = preparer(td_dialect())
|
|
1982
|
-
sqlbundle = SQLBundle()
|
|
1983
|
-
full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
|
|
1984
|
-
prepared.quote(schema),
|
|
1985
|
-
prepared.quote(table_name))
|
|
1986
|
-
help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
|
|
1987
|
-
|
|
1988
|
-
cur = execute_sql(help_table_sql)
|
|
1989
|
-
td_types_col_index = -1
|
|
1990
|
-
for i, col_metadata in enumerate(cur.description):
|
|
1991
|
-
# Help Table returns column names and
|
|
1992
|
-
# corresponding IcebergType, TeradataInternalType,
|
|
1993
|
-
# TeradataType. We need to extract column index for
|
|
1994
|
-
# 'TeradataType' column.
|
|
1995
|
-
if col_metadata[0].lower() == 'teradatatype':
|
|
1996
|
-
td_types_col_index = i
|
|
1997
|
-
|
|
1998
1993
|
col_names = []
|
|
1999
1994
|
col_types = []
|
|
2000
|
-
if
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
1995
|
+
if not use_dialect:
|
|
1996
|
+
# Get the column information from the strings type.
|
|
1997
|
+
prepared = preparer(td_dialect())
|
|
1998
|
+
sqlbundle = SQLBundle()
|
|
1999
|
+
full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
|
|
2000
|
+
prepared.quote(schema),
|
|
2001
|
+
prepared.quote(table_name))
|
|
2002
|
+
help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
|
|
2003
|
+
|
|
2004
|
+
cur = execute_sql(help_table_sql)
|
|
2005
|
+
td_types_col_index = -1
|
|
2006
|
+
|
|
2007
|
+
for i, col_metadata in enumerate(cur.description):
|
|
2008
|
+
# Help Table returns column names and
|
|
2009
|
+
# corresponding IcebergType, TeradataInternalType,
|
|
2010
|
+
# TeradataType. We need to extract column index for
|
|
2011
|
+
# 'TeradataType' column.
|
|
2012
|
+
if col_metadata[0].lower() in ['teradatatype', 'Type']:
|
|
2013
|
+
td_types_col_index = i
|
|
2014
|
+
|
|
2015
|
+
if td_types_col_index > -1:
|
|
2016
|
+
for col_info in cur.fetchall():
|
|
2017
|
+
col_names.append(col_info[0])
|
|
2018
|
+
col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
|
|
2019
|
+
else:
|
|
2020
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
|
|
2021
|
+
MessageCodes.TDMLDF_CREATE_FAIL)
|
|
2004
2022
|
else:
|
|
2005
|
-
|
|
2006
|
-
|
|
2023
|
+
new_kwarg = get_connection().dialect.__class__.__name__ + "_datalake"
|
|
2024
|
+
all_col_info = get_connection().dialect.get_columns(connection=get_connection(),
|
|
2025
|
+
table_name=table_name,
|
|
2026
|
+
schema=schema,
|
|
2027
|
+
table_only=True,
|
|
2028
|
+
**{new_kwarg: datalake})
|
|
2029
|
+
for col_dict in all_col_info:
|
|
2030
|
+
col_names.append(col_dict.get('name', col_dict.get('Column Name')))
|
|
2031
|
+
col_types.append(col_dict.get('type', col_dict.get('Type')))
|
|
2032
|
+
|
|
2007
2033
|
return col_names, col_types
|
|
2034
|
+
|
|
2035
|
+
@staticmethod
|
|
2036
|
+
def check_otf_dataframe():
|
|
2037
|
+
"""Decorator for validating if DataFrame is created on OTF table or not and throw error."""
|
|
2038
|
+
def decorator(method):
|
|
2039
|
+
def wrapper(self, *args, **kwargs):
|
|
2040
|
+
if not self._datalake:
|
|
2041
|
+
attr = getattr(type(self), method.__name__, None)
|
|
2042
|
+
caller_name = method.__name__ + '()'
|
|
2043
|
+
if isinstance(attr, property):
|
|
2044
|
+
caller_name = method.__name__
|
|
2045
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.OTF_TABLE_REQUIRED,
|
|
2046
|
+
caller_name),
|
|
2047
|
+
MessageCodes.UNSUPPORTED_OPERATION)
|
|
2048
|
+
|
|
2049
|
+
return method(self, *args, **kwargs)
|
|
2050
|
+
|
|
2051
|
+
return wrapper
|
|
2052
|
+
|
|
2053
|
+
return decorator
|