teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -12,13 +12,14 @@ This file implements util functions of data frame.
12
12
  """
13
13
 
14
14
  import numbers
15
+ import re
15
16
  import pandas as pd
16
17
  from collections import OrderedDict
17
18
 
18
19
  from teradataml.common.utils import UtilFuncs
19
20
  from teradataml.common.aed_utils import AedUtils
20
21
  from teradataml.common.constants import AEDConstants, PTITableConstants, \
21
- SQLPattern, PythonTypes
22
+ SQLPattern, PythonTypes, TeradataConstants, SQLConstants
22
23
  from teradataml.common.sqlbundle import SQLBundle
23
24
  from teradataml.common.exceptions import TeradataMlException
24
25
  from teradataml.common.messages import Messages
@@ -30,6 +31,7 @@ from teradataml.dbutils.dbutils import _execute_query_and_generate_pandas_df
30
31
 
31
32
  from teradataml.options.display import display
32
33
  from teradataml.options.configure import configure
34
+ from teradataml.utils.dtypes import _DtypesMappers
33
35
  from teradataml.utils.utils import execute_sql
34
36
 
35
37
  from teradatasqlalchemy.types import FLOAT, NUMBER, DECIMAL, PERIOD_TIMESTAMP
@@ -77,7 +79,10 @@ class DataFrameUtils():
77
79
  is_persist = True
78
80
 
79
81
  try:
80
- if node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_ML_QUERY_MULTI_OUTPUT.value or\
82
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
83
+ UtilFuncs._create_table(view_names[index], queries[index], volatile=True)
84
+
85
+ elif node_query_types[index] == AEDConstants.AED_QUERY_NODE_TYPE_ML_QUERY_MULTI_OUTPUT.value or\
81
86
  ("OUT TABLE " in queries[index] and SQLPattern.SQLMR.value.match(queries[index])) or \
82
87
  is_persist:
83
88
  # TODO:: OR condition in above needs to be removed once AED support is added.
@@ -1291,43 +1296,32 @@ class DataFrameUtils():
1291
1296
 
1292
1297
  aggregate_expr = ", ".join(select_columns)
1293
1298
  return aggregate_expr, new_column_names, new_column_types
1294
-
1299
+
1295
1300
  @staticmethod
1296
- def _invalid_describe_column(df, columns, metaexpr, groupby_column_list):
1301
+ def _validate_describe_columns(columns, metaexpr, groupby_column_list):
1297
1302
  """
1298
- Internal function to validate columns provided to describe() is correct or not,
1303
+ Internal function to validate columns provided to describe() are correct or not,
1299
1304
  when DataFrame is output of groupby and groupby_time.
1300
-
1301
1305
  PARAMETERS:
1302
- df:
1303
- Required Argument.
1304
- Specifies teradataml DataFrame we are collecting statistics for.
1305
- Types: str
1306
-
1307
1306
  columns:
1308
1307
  Optional Argument.
1309
1308
  Specifies the name(s) of columns we are collecting statistics for.
1310
1309
  Types: str ot List of strings (str)
1311
-
1312
1310
  metaexpr:
1313
1311
  Required Argument.
1314
1312
  Specifies the meta expression for the dataframe.
1315
1313
  Types: _MetaExpression
1316
-
1317
1314
  groupby_column_list:
1318
1315
  Optional Argument.
1319
1316
  Specifies the group by columns for the dataframe.
1320
1317
  Default Values: None.
1321
1318
  Types: str ot List of strings (str)
1322
-
1323
1319
  Returns:
1324
1320
  None
1325
-
1326
1321
  Raises:
1327
1322
  TeradataMLException
1328
1323
  """
1329
- invalid_columns = [_column for _column in groupby_column_list if columns is not None
1330
- and _column in columns]
1324
+ invalid_columns = [_column for _column in groupby_column_list if _column in columns]
1331
1325
  if len(invalid_columns) > 0:
1332
1326
  all_columns = [col.name for col in metaexpr.c]
1333
1327
  valid_columns = [item for item in all_columns if item not in groupby_column_list]
@@ -1849,7 +1843,10 @@ class DataFrameUtils():
1849
1843
  db_schema = UtilFuncs._extract_db_name(tab_name_first)
1850
1844
  db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1851
1845
 
1852
- return DataFrame(in_schema(db_schema, db_table_name))
1846
+ if db_schema:
1847
+ return DataFrame(in_schema(db_schema, db_table_name))
1848
+
1849
+ return DataFrame(db_table_name)
1853
1850
 
1854
1851
  pids_first = None
1855
1852
  parent_df = None
@@ -1865,11 +1862,146 @@ class DataFrameUtils():
1865
1862
  db_schema = UtilFuncs._extract_db_name(tab_name_first)
1866
1863
  db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1867
1864
 
1868
- parent_df = DataFrame(in_schema(db_schema, db_table_name))
1865
+ if db_schema:
1866
+ parent_df = DataFrame(in_schema(db_schema, db_table_name))
1867
+ else:
1868
+ parent_df = DataFrame(db_table_name)
1869
1869
  pids_first = pids
1870
1870
  else:
1871
1871
  if pids_first != pids:
1872
1872
  raise TeradataMlException(Messages.get_message(MessageCodes.DFS_NO_COMMON_PARENT),
1873
1873
  MessageCodes.DFS_NO_COMMON_PARENT)
1874
1874
 
1875
- return parent_df
1875
+ return parent_df
1876
+
1877
+ @staticmethod
1878
+ def _get_sqlalchemy_type_from_str(td_type):
1879
+ """
1880
+ Function to get teradatasqlalchemy type from string representation of that type.
1881
+
1882
+ PARAMETERS:
1883
+ td_type:
1884
+ Required Argument.
1885
+ Specifies string representation of teradatasqlalchemy type.
1886
+ Types: str
1887
+
1888
+ RAISES:
1889
+ ValueError
1890
+
1891
+ EXAMPLES:
1892
+ >>> dt = DataFrameUtils._get_sqlalchemy_type_from_str("DECIMAL(4,4)")
1893
+ >>> dt
1894
+ DECIMAL(precision=4, scale=4)
1895
+ >>> type(dt)
1896
+ teradatasqlalchemy.types.DECIMAL
1897
+
1898
+ >>> dt = DataFrameUtils._get_sqlalchemy_type_from_str("VARCHAR(32000) CHARACTER SET UNICODE")
1899
+ >>> dt
1900
+ VARCHAR(length=32000, charset='UNICODE')
1901
+ >>> type(dt)
1902
+ teradatasqlalchemy.types.VARCHAR
1903
+ """
1904
+ # 4 groups of pattern:
1905
+ # 1. Type name
1906
+ # 2. Comma separated parameters enclosed in parentheses
1907
+ # 3. Comma separated parameters without parenthesis
1908
+ # 4. Remaining string
1909
+ pattern = "([A-Z0-9_]+)(\((.*)\))?(.*)"
1910
+
1911
+ m = re.match(pattern, td_type)
1912
+ td_str_type = m.group(1)
1913
+ td_str_params = m.group(3)
1914
+ td_str_remain = m.group(4)
1915
+
1916
+ if m is None or td_str_type not in _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER.keys():
1917
+ raise ValueError("Invalid Teradata type: {} from datalake".format(td_type))
1918
+
1919
+ if td_str_type in ["VARCHAR", "CHAR"]:
1920
+ # If VARCHAR or CHAR, extract, length and charset from string.
1921
+ length = int(td_str_params.split(",")[0])
1922
+ charset = td_str_remain.strip().split(" ")[2]
1923
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
1924
+ (length=length, charset=charset)
1925
+
1926
+ if td_str_type in ["BLOB"]:
1927
+ # Ignoring the charset as BLOB does not have it.
1928
+ # If BLOB, extract length from string.
1929
+ length = int(td_str_params.split(",")[0])
1930
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
1931
+ (length=length)
1932
+
1933
+ if td_str_type in ["DECIMAL"]:
1934
+ # If DECIMAL, extract precision and scale from string.
1935
+ args = td_str_params.split(",")
1936
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]\
1937
+ (precision=int(args[0]), scale=int(args[1]))
1938
+
1939
+ # TODO: Test for other data types once OTF team finalize all data types.
1940
+ return _DtypesMappers.DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER[td_str_type]()
1941
+
1942
+ @staticmethod
1943
+ def _get_datalake_table_columns_info(schema, table_name, datalake):
1944
+ """
1945
+ Function to get column names and corresponding teradatasqlalchemy types
1946
+ of a datalake table using results of 'help table <datalake>.<db_name>.<table_name>'
1947
+ SQL query.
1948
+
1949
+ PARAMETERS:
1950
+ schema:
1951
+ Required Argument.
1952
+ Specifies name of schema.
1953
+ Types: str
1954
+
1955
+ table_name:
1956
+ Required Argument.
1957
+ Specifies name of table.
1958
+ Types: str
1959
+
1960
+ datalake:
1961
+ Required Argument.
1962
+ Specifies name of datalake.
1963
+ Types: str
1964
+
1965
+ RAISES:
1966
+ TeradataMlException
1967
+
1968
+ EXAMPLES:
1969
+ >>> DataFrameUtils._get_datalake_table_columns_info(table_name = 'sales',
1970
+ ... schema='otftestdb',
1971
+ ... datalake='datalake_iceberg_glue')
1972
+ (['id', 'masters', 'gpa', 'stats', 'programming', 'admitted'],
1973
+ [INTEGER(),
1974
+ VARCHAR(length=2000, charset='UNICODE'),
1975
+ FLOAT(),
1976
+ VARCHAR(length=2000, charset='UNICODE'),
1977
+ VARCHAR(length=2000, charset='UNICODE'),
1978
+ INTEGER()])
1979
+ """
1980
+ # Get the column information from the strings type.
1981
+ prepared = preparer(td_dialect())
1982
+ sqlbundle = SQLBundle()
1983
+ full_tbl_name = '{}.{}.{}'.format(prepared.quote(datalake),
1984
+ prepared.quote(schema),
1985
+ prepared.quote(table_name))
1986
+ help_table_sql = sqlbundle._get_sql_query(SQLConstants.SQL_HELP_TABLE).format(full_tbl_name)
1987
+
1988
+ cur = execute_sql(help_table_sql)
1989
+ td_types_col_index = -1
1990
+ for i, col_metadata in enumerate(cur.description):
1991
+ # Help Table returns column names and
1992
+ # corresponding IcebergType, TeradataInternalType,
1993
+ # TeradataType. We need to extract column index for
1994
+ # 'TeradataType' column.
1995
+ if col_metadata[0].lower() == 'teradatatype':
1996
+ td_types_col_index = i
1997
+
1998
+ col_names = []
1999
+ col_types = []
2000
+ if td_types_col_index > -1:
2001
+ for col_info in cur.fetchall():
2002
+ col_names.append(col_info[0])
2003
+ col_types.append(DataFrameUtils._get_sqlalchemy_type_from_str(col_info[td_types_col_index]))
2004
+ else:
2005
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
2006
+ MessageCodes.TDMLDF_CREATE_FAIL)
2007
+ return col_names, col_types