teradataml 20.0.0.5__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (119) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +306 -0
  3. teradataml/__init__.py +1 -1
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +162 -76
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/__init__.py +2 -0
  8. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  9. teradataml/analytics/json_parser/metadata.py +22 -4
  10. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  11. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  12. teradataml/analytics/sqle/__init__.py +3 -0
  13. teradataml/analytics/utils.py +59 -11
  14. teradataml/automl/__init__.py +2369 -464
  15. teradataml/automl/autodataprep/__init__.py +15 -0
  16. teradataml/automl/custom_json_utils.py +184 -112
  17. teradataml/automl/data_preparation.py +113 -58
  18. teradataml/automl/data_transformation.py +154 -53
  19. teradataml/automl/feature_engineering.py +113 -53
  20. teradataml/automl/feature_exploration.py +548 -25
  21. teradataml/automl/model_evaluation.py +260 -32
  22. teradataml/automl/model_training.py +399 -206
  23. teradataml/clients/auth_client.py +10 -6
  24. teradataml/clients/keycloak_client.py +165 -0
  25. teradataml/common/aed_utils.py +11 -2
  26. teradataml/common/bulk_exposed_utils.py +4 -2
  27. teradataml/common/constants.py +72 -2
  28. teradataml/common/exceptions.py +32 -0
  29. teradataml/common/garbagecollector.py +50 -21
  30. teradataml/common/messagecodes.py +73 -1
  31. teradataml/common/messages.py +27 -1
  32. teradataml/common/sqlbundle.py +25 -7
  33. teradataml/common/utils.py +210 -22
  34. teradataml/context/aed_context.py +16 -10
  35. teradataml/context/context.py +37 -9
  36. teradataml/data/Employee.csv +5 -0
  37. teradataml/data/Employee_Address.csv +4 -0
  38. teradataml/data/Employee_roles.csv +5 -0
  39. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  40. teradataml/data/byom_example.json +5 -0
  41. teradataml/data/creditcard_data.csv +284618 -0
  42. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  43. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  44. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  45. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  46. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  47. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  48. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  55. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  56. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  57. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  58. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  59. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  60. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  61. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  62. teradataml/data/load_example_data.py +29 -11
  63. teradataml/data/pattern_matching_data.csv +11 -0
  64. teradataml/data/payment_fraud_dataset.csv +10001 -0
  65. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  66. teradataml/data/teradataml_example.json +75 -1
  67. teradataml/data/url_data.csv +10 -9
  68. teradataml/dataframe/copy_to.py +715 -55
  69. teradataml/dataframe/dataframe.py +2115 -97
  70. teradataml/dataframe/dataframe_utils.py +66 -28
  71. teradataml/dataframe/functions.py +1130 -2
  72. teradataml/dataframe/setop.py +4 -1
  73. teradataml/dataframe/sql.py +710 -1039
  74. teradataml/dbutils/dbutils.py +470 -35
  75. teradataml/dbutils/filemgr.py +1 -1
  76. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  77. teradataml/hyperparameter_tuner/utils.py +4 -2
  78. teradataml/lib/aed_0_1.dll +0 -0
  79. teradataml/lib/libaed_0_1.dylib +0 -0
  80. teradataml/lib/libaed_0_1.so +0 -0
  81. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  82. teradataml/opensource/_base.py +7 -1
  83. teradataml/options/configure.py +20 -4
  84. teradataml/scriptmgmt/UserEnv.py +247 -36
  85. teradataml/scriptmgmt/lls_utils.py +140 -39
  86. teradataml/sdk/README.md +79 -0
  87. teradataml/sdk/__init__.py +4 -0
  88. teradataml/sdk/_auth_modes.py +422 -0
  89. teradataml/sdk/_func_params.py +487 -0
  90. teradataml/sdk/_json_parser.py +453 -0
  91. teradataml/sdk/_openapi_spec_constants.py +249 -0
  92. teradataml/sdk/_utils.py +236 -0
  93. teradataml/sdk/api_client.py +900 -0
  94. teradataml/sdk/constants.py +62 -0
  95. teradataml/sdk/modelops/__init__.py +98 -0
  96. teradataml/sdk/modelops/_client.py +409 -0
  97. teradataml/sdk/modelops/_constants.py +304 -0
  98. teradataml/sdk/modelops/models.py +2308 -0
  99. teradataml/sdk/spinner.py +107 -0
  100. teradataml/series/series.py +12 -7
  101. teradataml/store/feature_store/constants.py +601 -234
  102. teradataml/store/feature_store/feature_store.py +2886 -616
  103. teradataml/store/feature_store/mind_map.py +639 -0
  104. teradataml/store/feature_store/models.py +5831 -214
  105. teradataml/store/feature_store/utils.py +390 -0
  106. teradataml/table_operators/query_generator.py +4 -21
  107. teradataml/table_operators/table_operator_util.py +1 -1
  108. teradataml/table_operators/templates/dataframe_register.template +6 -2
  109. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  110. teradataml/utils/docstring.py +527 -0
  111. teradataml/utils/dtypes.py +95 -1
  112. teradataml/utils/internal_buffer.py +2 -2
  113. teradataml/utils/utils.py +41 -3
  114. teradataml/utils/validators.py +699 -18
  115. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +312 -2
  116. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +119 -87
  117. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  118. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  119. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -1796,7 +1796,8 @@ class DataFrameUtils():
1796
1796
  fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
1797
1797
  sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
1798
1798
  col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
1799
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types))
1799
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types),
1800
+ datalake=df._metaexpr.datalake)
1800
1801
  # Call the function from_node from appropriate class either DataFrame or GeoDataFrame
1801
1802
  new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
1802
1803
  new_df._orderby = df._orderby
@@ -1855,6 +1856,8 @@ class DataFrameUtils():
1855
1856
 
1856
1857
  db_schema = UtilFuncs._extract_db_name(tab_name_first)
1857
1858
  db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1859
+ if dfs[0]._metaexpr.datalake:
1860
+ return DataFrame(in_schema(db_schema, db_table_name, dfs[0]._metaexpr.datalake))
1858
1861
 
1859
1862
  if db_schema:
1860
1863
  return DataFrame(in_schema(db_schema, db_table_name))
@@ -1875,7 +1878,9 @@ class DataFrameUtils():
1875
1878
  db_schema = UtilFuncs._extract_db_name(tab_name_first)
1876
1879
  db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1877
1880
 
1878
- if db_schema:
1881
+ if dfs[i]._metaexpr.datalake:
1882
+ parent_df = DataFrame(in_schema(db_schema, db_table_name, dfs[i]._metaexpr.datalake))
1883
+ elif db_schema:
1879
1884
  parent_df = DataFrame(in_schema(db_schema, db_table_name))
1880
1885
  else:
1881
1886
  parent_df = DataFrame(db_table_name)
@@ -1919,7 +1924,7 @@ class DataFrameUtils():
1919
1924
  # 2. Comma separated parameters enclosed in parentheses
1920
1925
  # 3. Comma separated parameters without parenthesis
1921
1926
  # 4. Remaining string
1922
- pattern = "([A-Z0-9_]+)(\((.*)\))?(.*)"
1927
+ pattern = r"([A-Z0-9_]+)(\((.*)\))?(.*)"
1923
1928
 
1924
1929
  m = re.match(pattern, td_type)
1925
1930
  td_str_type = m.group(1)
@@ -1953,7 +1958,7 @@ class DataFrameUtils():
1953
1958
  return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]()
1954
1959
 
1955
1960
  @staticmethod
1956
- def _get_datalake_table_columns_info(schema, table_name, datalake):
1961
+ def _get_datalake_table_columns_info(schema, table_name, datalake, use_dialect=False):
1957
1962
  """
1958
1963
  Function to get column names and corresponding teradatasqlalchemy types
1959
1964
  of a datalake table using results of 'help table <datalake>.<db_name>.<table_name>'
@@ -1990,31 +1995,64 @@ class DataFrameUtils():
1990
1995
  VARCHAR(length=2000, charset='UNICODE'),
1991
1996
  INTEGER()])
1992
1997
  """
1993
- # Get the column information from the strings type.
1994
- prepared = preparer(td_dialect())
1995
- sqlbundle = SQLBundle()
1996
- full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
1997
- prepared.quote(schema),
1998
- prepared.quote(table_name))
1999
- help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
2000
-
2001
- cur = execute_sql(help_table_sql)
2002
- td_types_col_index = -1
2003
- for i, col_metadata in enumerate(cur.description):
2004
- # Help Table returns column names and
2005
- # corresponding IcebergType, TeradataInternalType,
2006
- # TeradataType. We need to extract column index for
2007
- # 'TeradataType' column.
2008
- if col_metadata[0].lower() == 'teradatatype':
2009
- td_types_col_index = i
2010
-
2011
1998
  col_names = []
2012
1999
  col_types = []
2013
- if td_types_col_index > -1:
2014
- for col_info in cur.fetchall():
2015
- col_names.append(col_info[0])
2016
- col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
2000
+ if not use_dialect:
2001
+ # Get the column information from the strings type.
2002
+ prepared = preparer(td_dialect())
2003
+ sqlbundle = SQLBundle()
2004
+ full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
2005
+ prepared.quote(schema),
2006
+ prepared.quote(table_name))
2007
+ help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
2008
+
2009
+ cur = execute_sql(help_table_sql)
2010
+ td_types_col_index = -1
2011
+
2012
+ for i, col_metadata in enumerate(cur.description):
2013
+ # Help Table returns column names and
2014
+ # corresponding IcebergType, TeradataInternalType,
2015
+ # TeradataType. We need to extract column index for
2016
+ # 'TeradataType' column.
2017
+ if col_metadata[0].lower() in ['teradatatype', 'Type']:
2018
+ td_types_col_index = i
2019
+
2020
+ if td_types_col_index > -1:
2021
+ for col_info in cur.fetchall():
2022
+ col_names.append(col_info[0])
2023
+ col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
2024
+ else:
2025
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
2026
+ MessageCodes.TDMLDF_CREATE_FAIL)
2017
2027
  else:
2018
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
2019
- MessageCodes.TDMLDF_CREATE_FAIL)
2028
+ new_kwarg = get_connection().dialect.__class__.__name__ + "_datalake"
2029
+ all_col_info = get_connection().dialect.get_columns(connection=get_connection(),
2030
+ table_name=table_name,
2031
+ schema=schema,
2032
+ table_only=True,
2033
+ **{new_kwarg: datalake})
2034
+ for col_dict in all_col_info:
2035
+ col_names.append(col_dict.get('name', col_dict.get('Column Name')))
2036
+ col_types.append(col_dict.get('type', col_dict.get('Type')))
2037
+
2020
2038
  return col_names, col_types
2039
+
2040
+ @staticmethod
2041
+ def check_otf_dataframe():
2042
+ """Decorator for validating if DataFrame is created on OTF table or not and throw error."""
2043
+ def decorator(method):
2044
+ def wrapper(self, *args, **kwargs):
2045
+ if not self._datalake:
2046
+ attr = getattr(type(self), method.__name__, None)
2047
+ caller_name = method.__name__ + '()'
2048
+ if isinstance(attr, property):
2049
+ caller_name = method.__name__
2050
+ raise TeradataMlException(Messages.get_message(MessageCodes.OTF_TABLE_REQUIRED,
2051
+ caller_name),
2052
+ MessageCodes.UNSUPPORTED_OPERATION)
2053
+
2054
+ return method(self, *args, **kwargs)
2055
+
2056
+ return wrapper
2057
+
2058
+ return decorator