teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -0,0 +1,183 @@
1
+ {
2
+ "json_schema_major_version": "1",
3
+ "json_schema_minor_version": "1",
4
+ "json_content_version": "1",
5
+ "function_name": "TD_API_VertexAI",
6
+ "function_version": "1.0",
7
+ "function_type": "fastpath",
8
+ "function_category": "Text Analytics AI",
9
+ "function_alias_name": "TDAPIVertexAI",
10
+ "function_r_name": "aa.td_apivertexai",
11
+ "supports_view": false,
12
+ "short_description": "This function sends an API request to a machine learning model deployed on Vertex AI, using input data from a Teradata table. The model processes the data and returns predictions, which are then provided as the output of the function for users to consume.",
13
+ "long_description": "This function sends an API request to a machine learning model deployed on Vertex AI, using input data from a Teradata table. The model processes the data and returns predictions, which are then provided as the output of the function for users to consume.",
14
+ "input_tables": [
15
+ {
16
+ "isOrdered": false,
17
+ "partitionByOne": false,
18
+ "name": "InputTable",
19
+ "alternateNames": [],
20
+ "isRequired": true,
21
+ "rDescription": "Specifies the table containing the input data.",
22
+ "description": "Specifies the table containing the input data.",
23
+ "datatype": "TABLE_ALIAS",
24
+ "allowsLists": false,
25
+ "rName": "data",
26
+ "useInR": true,
27
+ "rOrderNum": 1
28
+ }
29
+ ],
30
+ "argument_clauses": [
31
+ {
32
+ "name": "Endpoint",
33
+ "alternateNames": [],
34
+ "isRequired": false,
35
+ "rOrderNum": 2,
36
+ "allowNaN": false,
37
+ "description": "Specifies the Endpoint of the model deployed on Vertex AI.",
38
+ "rDescription": "Specifies the Endpoint of the model deployed on Vertex AI.",
39
+ "datatype": "STRING",
40
+ "allowsLists": false,
41
+ "allowPadding": true,
42
+ "rName": "endpoint",
43
+ "useInR": true,
44
+ "rFormulaUsage": false
45
+ },
46
+ {
47
+ "name": "Region",
48
+ "alternateNames": [],
49
+ "isRequired": false,
50
+ "rOrderNum": 3,
51
+ "allowNaN": false,
52
+ "description": "Specifies the name of the Google Cloud service region.",
53
+ "rDescription": "Specifies the name of the Google Cloud service region.",
54
+ "datatype": "STRING",
55
+ "allowsLists": false,
56
+ "allowPadding": true,
57
+ "rName": "region",
58
+ "useInR": true,
59
+ "rFormulaUsage": false
60
+ },
61
+ {
62
+ "name": "AccessToken",
63
+ "alternateNames": [],
64
+ "isRequired": false,
65
+ "rOrderNum": 4,
66
+ "allowNaN": false,
67
+ "description": "Specifies the session token for google cloud.",
68
+ "rDescription": "Specifies the session token for google cloud.",
69
+ "datatype": "STRING",
70
+ "allowsLists": false,
71
+ "allowPadding": true,
72
+ "rName": "access.token",
73
+ "useInR": true,
74
+ "rFormulaUsage": false
75
+ },
76
+ {
77
+ "name": "Project",
78
+ "alternateNames": [],
79
+ "isRequired": false,
80
+ "rOrderNum": 5,
81
+ "allowNaN": false,
82
+ "description": "Specify the name of the gcp project.",
83
+ "rDescription": "Specify the name of the gcp project.",
84
+ "datatype": "STRING",
85
+ "allowsLists": false,
86
+ "allowPadding": true,
87
+ "rName": "project",
88
+ "useInR": true,
89
+ "rFormulaUsage": false
90
+ },
91
+ {
92
+ "name": "EnableSafety",
93
+ "alternateNames": [],
94
+ "isRequired": true,
95
+ "rOrderNum": 6,
96
+ "permittedValues": ["TRUE", "FALSE"],
97
+ "allowNaN": false,
98
+ "description": "Specifies if the safety feature needs to be enabled.",
99
+ "rDescription": "Specifies if the safety feature needs to be enabled.",
100
+ "datatype": "STRING",
101
+ "allowsLists": false,
102
+ "allowPadding": true,
103
+ "rName": "enable.safety",
104
+ "useInR": true,
105
+ "rFormulaUsage": false
106
+ },
107
+ {
108
+ "targetTable": [
109
+ "InputTable"
110
+ ],
111
+ "checkDuplicate": true,
112
+ "allowedTypes": [],
113
+ "allowedTypeGroups": [
114
+ "ALL"
115
+ ],
116
+ "matchLengthOfArgument": "",
117
+ "allowPadding": false,
118
+ "name": "InputColumns",
119
+ "alternateNames": [],
120
+ "isRequired": true,
121
+ "rDescription": "Specifies the input table columns to be sent to the model hosted on vertex for inference.",
122
+ "description": "Specifies the input table columns to be sent to the model hosted on vertex for inference.",
123
+ "datatype": "COLUMNS",
124
+ "allowsLists": true,
125
+ "rName": "input.columns",
126
+ "useInR": true,
127
+ "rOrderNum": 7
128
+ },
129
+ {
130
+ "permittedValues": ["TRUE", "FALSE"],
131
+ "isOutputColumn": false,
132
+ "name": "isDebug",
133
+ "alternateNames": [],
134
+ "isRequired": false,
135
+ "rDescription": "Specify whether error logging is required.",
136
+ "description": "Specify whether error logging is required.",
137
+ "datatype": "STRING",
138
+ "allowsLists": false,
139
+ "rName": "is.debug",
140
+ "useInR": true,
141
+ "rOrderNum": 8
142
+ },
143
+ {
144
+ "targetTable": [
145
+ "InputTable"
146
+ ],
147
+ "checkDuplicate": true,
148
+ "allowedTypes": [],
149
+ "allowedTypeGroups": [
150
+ "ALL"
151
+ ],
152
+ "matchLengthOfArgument": "",
153
+ "allowPadding": false,
154
+ "name": "Accumulate",
155
+ "alternateNames": [],
156
+ "isRequired": false,
157
+ "rDescription": "Specifies the input table columns to copy to the output table. By default, the function copy all input table columns to the output table.",
158
+ "description": "Specifies the input table columns to copy to the output table. By default, the function copy all any input table columns to the output table.",
159
+ "datatype": "COLUMNS",
160
+ "allowsLists": true,
161
+ "rName": "accumulate",
162
+ "useInR": true,
163
+ "rOrderNum": 9
164
+ },
165
+ {
166
+ "name": "AUTHORIZATION",
167
+ "alternateNames": [],
168
+ "isRequired": false,
169
+ "rOrderNum":10,
170
+ "allowNaN": false,
171
+ "description": "Specifies the authorization object name containing the credentials, optionally prefixed by the database name.",
172
+ "rDescription": "Specifies the authorization object name containing the credentials, optionally prefixed by the database name.",
173
+ "datatype": "STRING",
174
+ "allowsLists": false,
175
+ "allowPadding": true,
176
+ "rName": "authorization",
177
+ "useInR": true,
178
+ "rFormulaUsage": false
179
+ }
180
+ ]
181
+ }
182
+
183
+
@@ -13,6 +13,7 @@ import csv
13
13
  import json
14
14
  import os
15
15
  import datetime
16
+ from teradataml.common.constants import TeradataReservedKeywords
16
17
  from teradataml.common.exceptions import TeradataMlException
17
18
  from teradataml.common.messages import Messages
18
19
  from teradataml.common.messagecodes import MessageCodes
@@ -189,16 +190,18 @@ def __create_table_insert_data(tablename):
189
190
  date_time_varbyte = {}
190
191
  pti_table = False
191
192
  pti_clause = ""
193
+ td_number_of_columns= ''
194
+ column_names = ''
192
195
 
193
196
  '''
194
197
  Create column datatype string required to create a table.
195
198
  EXAMPLE:
196
199
  id integer,model varchar(30)
197
200
  '''
198
- column_count = 0
199
201
  for column in col_types_dict.keys():
200
202
  if column in ["TD_TIMECODE", "TD_SEQNO"]:
201
- column_count = column_count + 1
203
+ td_number_of_columns += '?,'
204
+ column_names += f"{column},"
202
205
  continue
203
206
 
204
207
  if column == "<PTI_CLAUSE>":
@@ -212,11 +215,22 @@ def __create_table_insert_data(tablename):
212
215
  for column_type in ["date", "timestamp", "varbyte"]:
213
216
  if column_type in col_types_dict[column]:
214
217
  date_time_varbyte.setdefault(column_type, []).append(column)
218
+ quoted_column = f'"{column}"' if column.upper() in TeradataReservedKeywords.TERADATA_RESERVED_WORDS.value else column
219
+ column_dtypes ="{0}{1} {2},\n" .format(column_dtypes, quoted_column, col_types_dict[column])
220
+ if "PERIOD" in col_types_dict[column].upper() and tablename in ["Employee", "Employee_roles", "Employee_Address"]:
221
+ # Extract the type passed in PERIOD, e.g., PERIOD(DATE), PERIOD(TIMESTAMP)
222
+ if "AS TRANSACTIONTIME" in col_types_dict[column].upper():
223
+ continue
224
+ period_type = col_types_dict[column].upper().split("PERIOD(")[1].rsplit(")", 1)[0]
225
+ td_number_of_columns += f'PERIOD(CAST(? AS {period_type}),CAST(? AS {period_type})),'
226
+ else:
227
+ td_number_of_columns += '?,'
215
228
 
216
- column_dtypes ="{0}{1} {2},\n" .format(column_dtypes, column, col_types_dict[column])
217
- column_count = column_count + 1
218
-
219
- td_number_of_columns = '?,' * column_count
229
+ # Dynamically build column_names
230
+ if column != "<PTI_CLAUSE>" and "AS TRANSACTIONTIME" not in col_types_dict[column].upper():
231
+ column_names += f"{quoted_column},"
232
+
233
+ column_names = column_names.rstrip(',')
220
234
  # Deriving global connection using context.get_context()
221
235
  con = get_connection()
222
236
  # Get temporary database.
@@ -237,14 +251,14 @@ def __create_table_insert_data(tablename):
237
251
  UtilFuncs._create_table_using_columns(tablename, column_dtypes[:-2])
238
252
 
239
253
  try:
240
- __insert_into_table_from_csv(tablename, td_number_of_columns[:-1], csv_file, date_time_varbyte)
254
+ __insert_into_table_from_csv(tablename, td_number_of_columns[:-1], csv_file, date_time_varbyte, column_names)
241
255
  except:
242
256
  # Drop the table, as we have created the same.
243
257
  UtilFuncs._drop_table(tablename)
244
258
  raise
245
259
 
246
260
 
247
- def __insert_into_table_from_csv(tablename, column_markers, file, date_time_varbyte_columns):
261
+ def __insert_into_table_from_csv(tablename, column_markers, file, date_time_varbyte_columns, column_names):
248
262
  """
249
263
  Builds and executes a prepared statement with parameter markers for a table.
250
264
 
@@ -268,11 +282,16 @@ def __insert_into_table_from_csv(tablename, column_markers, file, date_time_varb
268
282
  Required Argument.
269
283
  Dictionary containing date, time and varbyte columns.
270
284
  Types: Dictionary
285
+
286
+ column_names
287
+ Required Argument.
288
+ Comma separated string of column names to be inserted into table.
289
+ Types: str
271
290
 
272
291
  EXAMPLES:
273
292
  date_time_varbyte_columns = {'date':['orderdate']}
274
293
  preparedstmt = __insert_into_table_from_csv(
275
- 'mytab', '?, ?','file.csv', date_time_varbyte_columns )
294
+ 'mytab', '?, ?','file.csv', date_time_varbyte_columns, column_names)
276
295
 
277
296
  RETURNS:
278
297
  None
@@ -281,7 +300,7 @@ def __insert_into_table_from_csv(tablename, column_markers, file, date_time_varb
281
300
  Database error if an error occurred while executing the DDL statement.
282
301
 
283
302
  """
284
- insert_stmt = SQLBundle._build_insert_into_table_records(tablename, column_markers)
303
+ insert_stmt = SQLBundle._build_insert_into_table_records(tablename, column_markers, column_names)
285
304
 
286
305
  # Defining the formatter.
287
306
  formatter = {
@@ -322,7 +341,6 @@ def __insert_into_table_from_csv(tablename, column_markers, file, date_time_varb
322
341
  pass
323
342
 
324
343
  insert_list.append(tuple(new_row))
325
-
326
344
  # Batch Insertion (using DBAPI's executeMany) used here to insert list of dictionaries
327
345
  execute_sql(insert_stmt, insert_list)
328
346