teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +196 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +79 -4
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +1 -0
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/automl/data_preparation.py +3 -2
- teradataml/automl/feature_engineering.py +15 -7
- teradataml/automl/model_training.py +39 -33
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +35 -0
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +8 -2
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +25 -3
- teradataml/common/utils.py +134 -9
- teradataml/context/context.py +20 -10
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/dataframe.py +543 -175
- teradataml/dataframe/functions.py +553 -25
- teradataml/dataframe/sql.py +184 -15
- teradataml/dbutils/dbutils.py +556 -18
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
- teradataml/options/__init__.py +7 -23
- teradataml/options/configure.py +29 -3
- teradataml/scriptmgmt/UserEnv.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +74 -21
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +33 -1
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2024 by Teradata Corporation. All rights reserved.
|
|
3
|
+
TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
|
|
4
|
+
|
|
5
|
+
Primary Owner: pradeep.garre@teradata.com
|
|
6
|
+
Secondary Owner: adithya.avvaru@teradata.com
|
|
7
|
+
|
|
8
|
+
This file implements constants required for Teradata Enterprise Feature Store.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from teradatasqlalchemy.types import *
|
|
12
|
+
from enum import Enum
|
|
13
|
+
|
|
14
|
+
# Template for creating the triggers on
|
|
15
|
+
# corresponding tables.
|
|
16
|
+
_EFS_TRIGGER_TEMPLATE = """
|
|
17
|
+
CREATE TRIGGER {{schema_name}}.{table}_trg
|
|
18
|
+
AFTER DELETE ON {{schema_name}}.{table}
|
|
19
|
+
REFERENCING OLD AS DeletedRow
|
|
20
|
+
FOR EACH ROW
|
|
21
|
+
INSERT INTO {{schema_name}}.{table}_staging
|
|
22
|
+
VALUES ({columns},
|
|
23
|
+
current_timestamp(6)
|
|
24
|
+
)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# Table for storing the features.
|
|
28
|
+
EFS_FEATURES_SPEC = {
|
|
29
|
+
"table_name": "_efs_features",
|
|
30
|
+
"columns": {
|
|
31
|
+
"name": VARCHAR(200),
|
|
32
|
+
"column_name": VARCHAR(200),
|
|
33
|
+
"description": VARCHAR(1024),
|
|
34
|
+
"tags": VARCHAR(2000),
|
|
35
|
+
"data_type": VARCHAR(1024),
|
|
36
|
+
"feature_type": VARCHAR(100),
|
|
37
|
+
"status": VARCHAR(100),
|
|
38
|
+
"creation_time": TIMESTAMP,
|
|
39
|
+
"modified_time": TIMESTAMP
|
|
40
|
+
},
|
|
41
|
+
"primary_index": "name"
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Table for storing the features.
|
|
45
|
+
EFS_FEATURES_STAGING_SPEC = {
|
|
46
|
+
"table_name": "{}_staging".format(EFS_FEATURES_SPEC["table_name"]),
|
|
47
|
+
"columns": {
|
|
48
|
+
"name": VARCHAR(200),
|
|
49
|
+
"column_name": VARCHAR(200),
|
|
50
|
+
"description": VARCHAR(1024),
|
|
51
|
+
"tags": VARCHAR(2000),
|
|
52
|
+
"data_type": VARCHAR(1024),
|
|
53
|
+
"feature_type": VARCHAR(100),
|
|
54
|
+
"status": VARCHAR(100),
|
|
55
|
+
"creation_time": TIMESTAMP,
|
|
56
|
+
"modified_time": TIMESTAMP,
|
|
57
|
+
"archived_time": TIMESTAMP
|
|
58
|
+
},
|
|
59
|
+
"primary_index": None
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
EFS_FEATURES_TRG = _EFS_TRIGGER_TEMPLATE.format(
|
|
63
|
+
table=EFS_FEATURES_SPEC["table_name"],
|
|
64
|
+
columns=", ".join(("DeletedRow.{}".format(col) for col in EFS_FEATURES_SPEC["columns"]))
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Table for storing the entities. Every Dataset has column(s) that are unique.
|
|
68
|
+
# This table holds all such columns.
|
|
69
|
+
EFS_ENTITY_SPEC = {
|
|
70
|
+
"table_name": "_efs_entity",
|
|
71
|
+
"columns": {
|
|
72
|
+
"name": VARCHAR(200),
|
|
73
|
+
"description": VARCHAR(200),
|
|
74
|
+
"creation_time": TIMESTAMP,
|
|
75
|
+
"modified_time": TIMESTAMP
|
|
76
|
+
},
|
|
77
|
+
"primary_index": ["name"]
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
EFS_ENTITY_STAGING_SPEC = {
|
|
81
|
+
"table_name": "{}_staging".format(EFS_ENTITY_SPEC["table_name"]),
|
|
82
|
+
"columns": {
|
|
83
|
+
"name": VARCHAR(200),
|
|
84
|
+
"description": VARCHAR(200),
|
|
85
|
+
"creation_time": TIMESTAMP,
|
|
86
|
+
"modified_time": TIMESTAMP,
|
|
87
|
+
"archived_time": TIMESTAMP
|
|
88
|
+
},
|
|
89
|
+
"primary_index": None
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
EFS_ENTITY_TRG = _EFS_TRIGGER_TEMPLATE.format(
|
|
93
|
+
table=EFS_ENTITY_SPEC["table_name"],
|
|
94
|
+
columns=", ".join(("DeletedRow.{}".format(col) for col in EFS_ENTITY_SPEC["columns"]))
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
EFS_ENTITY_XREF_SPEC = {
|
|
98
|
+
"table_name": "_efs_entity_xref",
|
|
99
|
+
"columns": {
|
|
100
|
+
"entity_name": VARCHAR(200),
|
|
101
|
+
"entity_column": VARCHAR(200)
|
|
102
|
+
},
|
|
103
|
+
"primary_index": ["entity_name", "entity_column"],
|
|
104
|
+
"foreign_keys": [
|
|
105
|
+
(
|
|
106
|
+
["entity_name"],
|
|
107
|
+
["{}.name".format(EFS_ENTITY_SPEC["table_name"])],
|
|
108
|
+
"entity_xref_fk"
|
|
109
|
+
)
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
EFS_ENTITY_XREF_STAGING_SPEC = {
|
|
114
|
+
"table_name": "{}_staging".format(EFS_ENTITY_XREF_SPEC["table_name"]),
|
|
115
|
+
"columns": {
|
|
116
|
+
"entity_name": VARCHAR(200),
|
|
117
|
+
"entity_column": VARCHAR(200),
|
|
118
|
+
"archived_time": TIMESTAMP
|
|
119
|
+
},
|
|
120
|
+
"primary_index": None
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
EFS_ENTITY_XREF_TRG = _EFS_TRIGGER_TEMPLATE.format(
|
|
124
|
+
table=EFS_ENTITY_XREF_SPEC["table_name"],
|
|
125
|
+
columns=", ".join(("DeletedRow.{}".format(col) for col in EFS_ENTITY_XREF_SPEC["columns"]))
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Table for storing the Data sources. Column source stores
|
|
129
|
+
# the corresponding Query.
|
|
130
|
+
EFS_DATA_SOURCE_SPEC = {
|
|
131
|
+
"table_name": "_efs_data_source",
|
|
132
|
+
"columns": {
|
|
133
|
+
"name": VARCHAR(200),
|
|
134
|
+
"description": VARCHAR(1024),
|
|
135
|
+
"timestamp_col_name": VARCHAR(50),
|
|
136
|
+
"source": VARCHAR(5000),
|
|
137
|
+
"creation_time": TIMESTAMP,
|
|
138
|
+
"modified_time": TIMESTAMP
|
|
139
|
+
},
|
|
140
|
+
"primary_index": "name"
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
EFS_DATA_SOURCE_STAGING_SPEC = {
|
|
144
|
+
"table_name": "{}_staging".format(EFS_DATA_SOURCE_SPEC["table_name"]),
|
|
145
|
+
"columns": {
|
|
146
|
+
"name": VARCHAR(200),
|
|
147
|
+
"description": VARCHAR(1024),
|
|
148
|
+
"timestamp_col_name": VARCHAR(50),
|
|
149
|
+
"source": VARCHAR(5000),
|
|
150
|
+
"creation_time": TIMESTAMP,
|
|
151
|
+
"modified_time": TIMESTAMP,
|
|
152
|
+
"archived_time": TIMESTAMP
|
|
153
|
+
},
|
|
154
|
+
"primary_index": None
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
EFS_DATA_SOURCE_TRG = _EFS_TRIGGER_TEMPLATE.format(
|
|
158
|
+
table=EFS_DATA_SOURCE_SPEC["table_name"],
|
|
159
|
+
columns=", ".join(("DeletedRow.{}".format(col) for col in EFS_DATA_SOURCE_SPEC["columns"]))
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Table for storing the feature groups. This table holds all the required
|
|
163
|
+
# parameters for creating DataFrame.
|
|
164
|
+
EFS_FEATURE_GROUP_SPEC = {
|
|
165
|
+
"table_name": "_efs_feature_group",
|
|
166
|
+
"columns": {
|
|
167
|
+
"name": VARCHAR(200),
|
|
168
|
+
"description": VARCHAR(200),
|
|
169
|
+
"data_source_name": VARCHAR(200),
|
|
170
|
+
"entity_name": VARCHAR(200),
|
|
171
|
+
"creation_time": TIMESTAMP,
|
|
172
|
+
"modified_time": TIMESTAMP
|
|
173
|
+
},
|
|
174
|
+
"primary_index": "name",
|
|
175
|
+
"foreign_keys": [
|
|
176
|
+
(
|
|
177
|
+
["data_source_name"],
|
|
178
|
+
["{}.name".format(EFS_DATA_SOURCE_SPEC["table_name"])],
|
|
179
|
+
"data_source_name_fk"
|
|
180
|
+
),
|
|
181
|
+
(
|
|
182
|
+
["entity_name"],
|
|
183
|
+
["{}.name".format(EFS_ENTITY_SPEC["table_name"])],
|
|
184
|
+
"entity_fk"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
]
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
EFS_FEATURE_GROUP_STAGING_SPEC = {
|
|
191
|
+
"table_name": "{}_staging".format(EFS_FEATURE_GROUP_SPEC["table_name"]),
|
|
192
|
+
"columns": {
|
|
193
|
+
"name": VARCHAR(200),
|
|
194
|
+
"description": VARCHAR(200),
|
|
195
|
+
"data_source_name": VARCHAR(200),
|
|
196
|
+
"entity_name": VARCHAR(200),
|
|
197
|
+
"creation_time": TIMESTAMP,
|
|
198
|
+
"modified_time": TIMESTAMP,
|
|
199
|
+
"archived_time": TIMESTAMP
|
|
200
|
+
},
|
|
201
|
+
"primary_index": None
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
EFS_FEATURE_GROUP_TRG = _EFS_TRIGGER_TEMPLATE.format(
|
|
205
|
+
table=EFS_FEATURE_GROUP_SPEC["table_name"],
|
|
206
|
+
columns=", ".join(("DeletedRow.{}".format(col) for col in EFS_FEATURE_GROUP_SPEC["columns"]))
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# Table for storing the feature names and associated group names.
|
|
211
|
+
EFS_GROUP_FEATURES_SPEC = {
|
|
212
|
+
"table_name": "_efs_group_features",
|
|
213
|
+
"columns": {
|
|
214
|
+
"feature_name": VARCHAR(200),
|
|
215
|
+
"group_name": VARCHAR(200),
|
|
216
|
+
"creation_time": TIMESTAMP,
|
|
217
|
+
"modified_time": TIMESTAMP
|
|
218
|
+
},
|
|
219
|
+
"primary_index": ["feature_name", "group_name"],
|
|
220
|
+
"foreign_keys": [
|
|
221
|
+
(
|
|
222
|
+
["feature_name"],
|
|
223
|
+
["{}.name".format(EFS_FEATURES_SPEC["table_name"])],
|
|
224
|
+
"feature_name_fk"
|
|
225
|
+
),
|
|
226
|
+
(
|
|
227
|
+
["group_name"],
|
|
228
|
+
["{}.name".format(EFS_FEATURE_GROUP_SPEC["table_name"])],
|
|
229
|
+
"group_name_fk"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
]
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
EFS_GROUP_FEATURES_STAGING_SPEC = {
|
|
236
|
+
"table_name": "{}_staging".format(EFS_GROUP_FEATURES_SPEC["table_name"]),
|
|
237
|
+
"columns": {
|
|
238
|
+
"feature_name": VARCHAR(200),
|
|
239
|
+
"group_name": VARCHAR(200),
|
|
240
|
+
"creation_time": TIMESTAMP,
|
|
241
|
+
"modified_time": TIMESTAMP,
|
|
242
|
+
"archived_time": TIMESTAMP
|
|
243
|
+
},
|
|
244
|
+
"primary_index": None
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
EFS_GROUP_FEATURES_TRG = _EFS_TRIGGER_TEMPLATE.format(
|
|
248
|
+
table=EFS_GROUP_FEATURES_SPEC["table_name"],
|
|
249
|
+
columns=", ".join(("DeletedRow.{}".format(col) for col in EFS_GROUP_FEATURES_SPEC["columns"]))
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Table to store the version of feature store. This is very important.
|
|
253
|
+
# When teradataml incrementally adds functionality for feature store, this
|
|
254
|
+
# version will be deciding factor whether teradataml should automatically
|
|
255
|
+
# update metadata or not.
|
|
256
|
+
EFS_VERSION_SPEC = {
|
|
257
|
+
"table_name": "_efs_version",
|
|
258
|
+
"columns": {
|
|
259
|
+
"version": VARCHAR(20),
|
|
260
|
+
"creation_time": TIMESTAMP
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
EFS_VERSION = "1.0.0"
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
EFS_TABLES = {
|
|
268
|
+
"feature": EFS_FEATURES_SPEC["table_name"],
|
|
269
|
+
"feature_staging": EFS_FEATURES_STAGING_SPEC["table_name"],
|
|
270
|
+
"feature_group": EFS_FEATURE_GROUP_SPEC["table_name"],
|
|
271
|
+
"feature_group_staging": EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
|
|
272
|
+
"entity": EFS_ENTITY_SPEC["table_name"],
|
|
273
|
+
"entity_staging": EFS_ENTITY_STAGING_SPEC["table_name"],
|
|
274
|
+
"entity_xref": EFS_ENTITY_XREF_SPEC["table_name"],
|
|
275
|
+
"entity_staging_xref": EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
|
|
276
|
+
"data_source": EFS_DATA_SOURCE_SPEC["table_name"],
|
|
277
|
+
"data_source_staging": EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
|
|
278
|
+
"group_features": EFS_GROUP_FEATURES_SPEC["table_name"],
|
|
279
|
+
"group_features_staging": EFS_GROUP_FEATURES_STAGING_SPEC["table_name"],
|
|
280
|
+
"version": EFS_VERSION_SPEC["table_name"]
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class FeatureStatus(Enum):
|
|
285
|
+
ACTIVE = 1
|
|
286
|
+
INACTIVE = 2
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class FeatureType(Enum):
|
|
290
|
+
CONTINUOUS = 1
|
|
291
|
+
CATEGORICAL = 2
|