teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (131) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +182 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +8 -13
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +60 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +12 -8
  17. teradataml/clients/keycloak_client.py +165 -0
  18. teradataml/common/constants.py +71 -26
  19. teradataml/common/exceptions.py +32 -0
  20. teradataml/common/messagecodes.py +28 -0
  21. teradataml/common/messages.py +13 -4
  22. teradataml/common/sqlbundle.py +3 -2
  23. teradataml/common/utils.py +345 -45
  24. teradataml/context/context.py +259 -93
  25. teradataml/data/apriori_example.json +22 -0
  26. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  27. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  28. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  29. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  30. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  31. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  32. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  34. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  35. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  37. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  38. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  39. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  40. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  41. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  42. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  43. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  46. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  49. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  50. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  51. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  53. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  54. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  55. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  56. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  57. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  58. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  59. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  60. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  61. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  62. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  63. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  65. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  66. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  67. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  68. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  69. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  70. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  71. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  72. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  73. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  74. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  75. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  76. teradataml/data/ner_dict.csv +8 -0
  77. teradataml/data/ner_input_eng.csv +7 -0
  78. teradataml/data/ner_rule.csv +5 -0
  79. teradataml/data/pattern_matching_data.csv +11 -0
  80. teradataml/data/pos_input.csv +40 -0
  81. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  82. teradataml/data/tdnerextractor_example.json +14 -0
  83. teradataml/data/teradataml_example.json +21 -1
  84. teradataml/data/textmorph_example.json +5 -0
  85. teradataml/data/to_num_data.csv +4 -0
  86. teradataml/data/tochar_data.csv +5 -0
  87. teradataml/data/trans_dense.csv +16 -0
  88. teradataml/data/trans_sparse.csv +55 -0
  89. teradataml/data/url_data.csv +10 -9
  90. teradataml/dataframe/copy_to.py +38 -27
  91. teradataml/dataframe/data_transfer.py +61 -45
  92. teradataml/dataframe/dataframe.py +1110 -132
  93. teradataml/dataframe/dataframe_utils.py +73 -27
  94. teradataml/dataframe/functions.py +1070 -9
  95. teradataml/dataframe/sql.py +750 -959
  96. teradataml/dbutils/dbutils.py +33 -13
  97. teradataml/dbutils/filemgr.py +14 -10
  98. teradataml/hyperparameter_tuner/utils.py +4 -2
  99. teradataml/lib/aed_0_1.dll +0 -0
  100. teradataml/opensource/_base.py +12 -157
  101. teradataml/options/configure.py +24 -9
  102. teradataml/scriptmgmt/UserEnv.py +317 -39
  103. teradataml/scriptmgmt/lls_utils.py +456 -135
  104. teradataml/sdk/README.md +79 -0
  105. teradataml/sdk/__init__.py +4 -0
  106. teradataml/sdk/_auth_modes.py +422 -0
  107. teradataml/sdk/_func_params.py +487 -0
  108. teradataml/sdk/_json_parser.py +453 -0
  109. teradataml/sdk/_openapi_spec_constants.py +249 -0
  110. teradataml/sdk/_utils.py +236 -0
  111. teradataml/sdk/api_client.py +897 -0
  112. teradataml/sdk/constants.py +62 -0
  113. teradataml/sdk/modelops/__init__.py +98 -0
  114. teradataml/sdk/modelops/_client.py +406 -0
  115. teradataml/sdk/modelops/_constants.py +304 -0
  116. teradataml/sdk/modelops/models.py +2308 -0
  117. teradataml/sdk/spinner.py +107 -0
  118. teradataml/store/__init__.py +1 -1
  119. teradataml/table_operators/Apply.py +16 -1
  120. teradataml/table_operators/Script.py +20 -1
  121. teradataml/table_operators/query_generator.py +4 -21
  122. teradataml/table_operators/table_operator_util.py +58 -9
  123. teradataml/utils/dtypes.py +4 -2
  124. teradataml/utils/internal_buffer.py +22 -2
  125. teradataml/utils/utils.py +0 -1
  126. teradataml/utils/validators.py +318 -58
  127. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
  128. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
  129. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
  130. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
  131. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
@@ -652,7 +652,7 @@ class DataFrameUtils():
652
652
  all_operations = list(set(all_operations))
653
653
  invalid_aggregates = []
654
654
  for operation in all_operations:
655
- if operation not in valid_aggregate_operations \
655
+ if operation not in valid_aggregate_operations and not operation.startswith('percentile_') \
656
656
  and operation not in UtilFuncs._get_valid_time_series_aggregate_operations():
657
657
  invalid_aggregates.append(operation)
658
658
  if len(invalid_aggregates) > 0: # If any of the aggregate operations specified is not valid
@@ -735,7 +735,20 @@ class DataFrameUtils():
735
735
  quoted_columns = UtilFuncs._process_for_teradata_keyword(kwargs[key_to_process])
736
736
  kwargs[key_to_process] = quoted_columns
737
737
 
738
- func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
738
+ if operation.startswith('percentile_'):
739
+ try:
740
+ _operation_value = operation.split('_')
741
+ _floatvalue = float(_operation_value[1])
742
+ if _floatvalue < 0.0 or _floatvalue > 1.0 or len(_operation_value)>2:
743
+ raise ValueError
744
+ except ValueError:
745
+ mssg = "Invalid aggregate operation '{}' requested on TeradataML DataFrame." \
746
+ " Valid operation should be in format 'percentile_<floatvalue>' and <floatvalue> " \
747
+ "should be in range [0.0, 1.0].".format(operation)
748
+ raise ValueError(mssg) from None
749
+ func_expression = getattr(df[column], 'percentile')(percentile=_floatvalue)
750
+ else:
751
+ func_expression = getattr(df[column], operation)(describe_op=describe_op, **kwargs)
739
752
  new_column_name = column if describe_op else "{1}_{0}".format(column, operation)
740
753
  # column_supported, new_column_name, new_column_type, column_aggr_expr, invalid_column_str
741
754
  return True, new_column_name, NUMBER() if describe_op else func_expression.type, \
@@ -1940,7 +1953,7 @@ class DataFrameUtils():
1940
1953
  return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]()
1941
1954
 
1942
1955
  @staticmethod
1943
- def _get_datalake_table_columns_info(schema, table_name, datalake):
1956
+ def _get_datalake_table_columns_info(schema, table_name, datalake, use_dialect=False):
1944
1957
  """
1945
1958
  Function to get column names and corresponding teradatasqlalchemy types
1946
1959
  of a datalake table using results of 'help table <datalake>.<db_name>.<table_name>'
@@ -1977,31 +1990,64 @@ class DataFrameUtils():
1977
1990
  VARCHAR(length=2000, charset='UNICODE'),
1978
1991
  INTEGER()])
1979
1992
  """
1980
- # Get the column information from the strings type.
1981
- prepared = preparer(td_dialect())
1982
- sqlbundle = SQLBundle()
1983
- full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
1984
- prepared.quote(schema),
1985
- prepared.quote(table_name))
1986
- help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
1987
-
1988
- cur = execute_sql(help_table_sql)
1989
- td_types_col_index = -1
1990
- for i, col_metadata in enumerate(cur.description):
1991
- # Help Table returns column names and
1992
- # corresponding IcebergType, TeradataInternalType,
1993
- # TeradataType. We need to extract column index for
1994
- # 'TeradataType' column.
1995
- if col_metadata[0].lower() == 'teradatatype':
1996
- td_types_col_index = i
1997
-
1998
1993
  col_names = []
1999
1994
  col_types = []
2000
- if td_types_col_index > -1:
2001
- for col_info in cur.fetchall():
2002
- col_names.append(col_info[0])
2003
- col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
1995
+ if not use_dialect:
1996
+ # Get the column information from the strings type.
1997
+ prepared = preparer(td_dialect())
1998
+ sqlbundle = SQLBundle()
1999
+ full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
2000
+ prepared.quote(schema),
2001
+ prepared.quote(table_name))
2002
+ help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
2003
+
2004
+ cur = execute_sql(help_table_sql)
2005
+ td_types_col_index = -1
2006
+
2007
+ for i, col_metadata in enumerate(cur.description):
2008
+ # Help Table returns column names and
2009
+ # corresponding IcebergType, TeradataInternalType,
2010
+ # TeradataType. We need to extract column index for
2011
+ # 'TeradataType' column.
2012
+ if col_metadata[0].lower() in ['teradatatype', 'Type']:
2013
+ td_types_col_index = i
2014
+
2015
+ if td_types_col_index > -1:
2016
+ for col_info in cur.fetchall():
2017
+ col_names.append(col_info[0])
2018
+ col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
2019
+ else:
2020
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
2021
+ MessageCodes.TDMLDF_CREATE_FAIL)
2004
2022
  else:
2005
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
2006
- MessageCodes.TDMLDF_CREATE_FAIL)
2023
+ new_kwarg = get_connection().dialect.__class__.__name__ + "_datalake"
2024
+ all_col_info = get_connection().dialect.get_columns(connection=get_connection(),
2025
+ table_name=table_name,
2026
+ schema=schema,
2027
+ table_only=True,
2028
+ **{new_kwarg: datalake})
2029
+ for col_dict in all_col_info:
2030
+ col_names.append(col_dict.get('name', col_dict.get('Column Name')))
2031
+ col_types.append(col_dict.get('type', col_dict.get('Type')))
2032
+
2007
2033
  return col_names, col_types
2034
+
2035
+ @staticmethod
2036
+ def check_otf_dataframe():
2037
+ """Decorator for validating if DataFrame is created on OTF table or not and throw error."""
2038
+ def decorator(method):
2039
+ def wrapper(self, *args, **kwargs):
2040
+ if not self._datalake:
2041
+ attr = getattr(type(self), method.__name__, None)
2042
+ caller_name = method.__name__ + '()'
2043
+ if isinstance(attr, property):
2044
+ caller_name = method.__name__
2045
+ raise TeradataMlException(Messages.get_message(MessageCodes.OTF_TABLE_REQUIRED,
2046
+ caller_name),
2047
+ MessageCodes.UNSUPPORTED_OPERATION)
2048
+
2049
+ return method(self, *args, **kwargs)
2050
+
2051
+ return wrapper
2052
+
2053
+ return decorator