tdfs4ds 0.2.4.38__py3-none-any.whl → 0.2.4.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/feature_store/feature_store_management.py +36 -63
- tdfs4ds/utils/query_management.py +18 -40
- {tdfs4ds-0.2.4.38.dist-info → tdfs4ds-0.2.4.39.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.38.dist-info → tdfs4ds-0.2.4.39.dist-info}/RECORD +7 -7
- {tdfs4ds-0.2.4.38.dist-info → tdfs4ds-0.2.4.39.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.38.dist-info → tdfs4ds-0.2.4.39.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
@@ -130,57 +130,27 @@ def feature_store_catalog_creation(if_exists='replace', comment='this table is a
|
|
|
130
130
|
return tdfs4ds.FEATURE_CATALOG_NAME
|
|
131
131
|
|
|
132
132
|
|
|
133
|
-
def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index
|
|
133
|
+
def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index=None, partitioning=''):
|
|
134
134
|
"""
|
|
135
135
|
Creates a table and a corresponding view for feature storage in a Teradata database schema, based on specified entity ID and feature type.
|
|
136
|
-
|
|
137
|
-
This function automates the creation of a table and view tailored for storing features in a structured manner. It leverages provided entity identifiers and feature types to generate table and view names dynamically, integrating with an existing feature catalog for consistency and reference. The table and view are created with considerations for primary indexing and optional partitioning strategies to optimize data management and access.
|
|
138
|
-
|
|
139
|
-
Parameters:
|
|
140
|
-
- entity_id (dict): Maps column names to their respective data types, defining the structure of the entity identifier(s).
|
|
141
|
-
- feature_type (str): Specifies the data type of the feature (e.g., 'FLOAT', 'BIGINT', 'VARCHAR_LATIN', 'VARCHAR_UNICODE').
|
|
142
|
-
- if_exists (str, optional): Determines the action if the table already exists. Options include:
|
|
143
|
-
'fail' (default), which raises an error; and 'replace', which drops the existing table and creates a new one.
|
|
144
|
-
- primary_index (list, optional): Specifies the columns to be used as the primary index for the table. Enhances data retrieval performance.
|
|
145
|
-
- partitioning (str, optional): SQL clause to define table partitioning. Aids in managing large datasets efficiently.
|
|
146
|
-
|
|
147
|
-
Returns:
|
|
148
|
-
str: The name of the newly created feature store table.
|
|
149
|
-
|
|
150
|
-
Note:
|
|
151
|
-
- Utilizes default schema and feature catalog names as defined in the tdfs4ds module.
|
|
152
|
-
- The primary index typically includes the entity ID, feature ID, and feature version for optimal data organization.
|
|
153
|
-
- A secondary index on the feature ID facilitates efficient querying.
|
|
154
|
-
- Corresponding views offer a snapshot of the current valid-time features, simplifying temporal queries.
|
|
155
|
-
- Existing tables are handled based on the 'if_exists' parameter, with support for replacing or retaining the tables.
|
|
156
|
-
- Assumes necessary database access and permissions are available for table and view creation.
|
|
157
|
-
|
|
158
|
-
Example Usage:
|
|
159
|
-
>>> entity_id_dict = {'customer_id': 'INTEGER'}
|
|
160
|
-
>>> table_name = feature_store_table_creation(entity_id_dict, 'FLOAT')
|
|
161
|
-
>>> print(f"Feature store table {table_name} created successfully.")
|
|
162
136
|
"""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() ==table_name.lower()]) > 0:
|
|
167
|
-
if tdfs4ds.DISPLAY_LOGS:
|
|
168
|
-
print(f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
|
|
137
|
+
table_name, view_name = get_feature_store_table_name(entity_id, feature_type, primary_index=primary_index, partitioning=partitioning)
|
|
138
|
+
if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() == table_name.lower()]) > 0:
|
|
139
|
+
logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
|
|
169
140
|
return table_name
|
|
170
141
|
else:
|
|
171
|
-
|
|
172
|
-
print(f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
|
|
142
|
+
logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
|
|
173
143
|
|
|
174
144
|
query_feature_value = {
|
|
175
145
|
'FLOAT': 'FEATURE_VALUE FLOAT',
|
|
176
146
|
'BIGINT': 'FEATURE_VALUE BIGINT',
|
|
177
147
|
'VARCHAR_LATIN': f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET LATIN',
|
|
178
148
|
'VARCHAR_UNICODE': f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET UNICODE',
|
|
179
|
-
'TIMESTAMP0'
|
|
180
|
-
'TIMESTAMP0TZ'
|
|
181
|
-
'PERIODTS0'
|
|
149
|
+
'TIMESTAMP0': 'FEATURE_VALUE TIMESTAMP(0)',
|
|
150
|
+
'TIMESTAMP0TZ': 'FEATURE_VALUE TIMESTAMP(0) WITH TIME ZONE',
|
|
151
|
+
'PERIODTS0': 'FEATURE_VALUE PERIOD(TIMESTAMP(0))',
|
|
182
152
|
'PERIODTS0TZ': 'FEATURE_VALUE PERIOD(TIMESTAMP(0) WITH TIME ZONE)',
|
|
183
|
-
'DECIMAL'
|
|
153
|
+
'DECIMAL': 'FEATURE_VALUE DECIMAL(38,19)'
|
|
184
154
|
}
|
|
185
155
|
|
|
186
156
|
# Construct the column definitions for the table based on the entity ID
|
|
@@ -197,12 +167,14 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
|
|
|
197
167
|
# SQL query to create the feature store table
|
|
198
168
|
if feature_type.lower() == 'ref':
|
|
199
169
|
partitioning = partitioning.replace('"', "'")
|
|
200
|
-
partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),','')
|
|
170
|
+
partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),', '')
|
|
201
171
|
partitioning = partitioning.replace(
|
|
202
172
|
f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH})',
|
|
203
|
-
''
|
|
173
|
+
''
|
|
174
|
+
)
|
|
204
175
|
substr = extract_partition_content(partitioning.upper())
|
|
205
|
-
if len(substr)==0:
|
|
176
|
+
if len(substr) == 0:
|
|
177
|
+
partitioning = ''
|
|
206
178
|
query = f"""
|
|
207
179
|
CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
|
|
208
180
|
FALLBACK,
|
|
@@ -218,7 +190,7 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
|
|
|
218
190
|
{partitioning};
|
|
219
191
|
"""
|
|
220
192
|
else:
|
|
221
|
-
partitioning = partitioning.replace('"',"'")
|
|
193
|
+
partitioning = partitioning.replace('"', "'")
|
|
222
194
|
query = f"""
|
|
223
195
|
CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
|
|
224
196
|
FALLBACK,
|
|
@@ -267,39 +239,40 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
|
|
|
267
239
|
|
|
268
240
|
try:
|
|
269
241
|
# Attempt to execute the create table query
|
|
270
|
-
execute_query(query)
|
|
271
|
-
execute_query(query3)
|
|
242
|
+
execute_query(query, raise_error=True)
|
|
243
|
+
execute_query(query3, raise_error=True)
|
|
272
244
|
if tdml.display.print_sqlmr_query:
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
#execute_query(query2)
|
|
245
|
+
logger_safe('info', query)
|
|
246
|
+
logger_safe('info', query3)
|
|
247
|
+
logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been created')
|
|
248
|
+
# execute_query(query2)
|
|
277
249
|
except Exception as e:
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
if
|
|
281
|
-
execute_query(f'DROP TABLE {tdfs4ds.SCHEMA}.{table_name}')
|
|
282
|
-
|
|
250
|
+
msg = str(e).split('\n')[0]
|
|
251
|
+
logger_safe('error', msg)
|
|
252
|
+
if msg.endswith('already exists.') and (if_exists == 'replace'):
|
|
253
|
+
execute_query(f'DROP TABLE {tdfs4ds.SCHEMA}.{table_name}', raise_error=True)
|
|
254
|
+
logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been dropped')
|
|
283
255
|
try:
|
|
284
256
|
# Attempt to recreate the table after dropping it
|
|
285
|
-
execute_query(query)
|
|
286
|
-
|
|
257
|
+
execute_query(query, raise_error=True)
|
|
258
|
+
logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been re-created')
|
|
287
259
|
if tdml.display.print_sqlmr_query:
|
|
288
|
-
|
|
289
|
-
except Exception as
|
|
290
|
-
|
|
260
|
+
logger_safe('info', query)
|
|
261
|
+
except Exception as e2:
|
|
262
|
+
logger_safe('error', str(e2).split('\n')[0])
|
|
291
263
|
|
|
292
264
|
try:
|
|
293
265
|
# Attempt to create the view
|
|
294
|
-
execute_query(query_view)
|
|
266
|
+
execute_query(query_view, raise_error=True)
|
|
295
267
|
if tdml.display.print_sqlmr_query:
|
|
296
|
-
|
|
297
|
-
|
|
268
|
+
logger_safe('info', query_view)
|
|
269
|
+
logger_safe('info', f'VIEW {tdfs4ds.SCHEMA}.{view_name} has been created')
|
|
298
270
|
except Exception as e:
|
|
299
|
-
|
|
271
|
+
logger_safe('error', str(e).split('\n')[0])
|
|
300
272
|
|
|
301
273
|
return table_name
|
|
302
274
|
|
|
275
|
+
|
|
303
276
|
def register_features(entity_id, feature_names_types, primary_index = None, partitioning = ''):
|
|
304
277
|
"""
|
|
305
278
|
Orchestrates the registration or update of feature definitions in a Teradata database's feature catalog.
|
|
@@ -84,71 +84,49 @@ def execute_query_wrapper(f):
|
|
|
84
84
|
return wrapped_f
|
|
85
85
|
|
|
86
86
|
|
|
87
|
-
def execute_query(query):
|
|
87
|
+
def execute_query(query, raise_error=False):
|
|
88
88
|
"""
|
|
89
89
|
Execute a SQL query or a list of queries using the tdml module.
|
|
90
90
|
|
|
91
|
-
This function checks the version of the tdml module and executes the query or queries accordingly.
|
|
92
|
-
For versions greater than 17.20.00.03, it uses `tdml.execute_sql`; otherwise, it uses `tdml.get_context().execute`.
|
|
93
|
-
|
|
94
91
|
Args:
|
|
95
92
|
query (str or list): A single SQL query string or a list of SQL query strings.
|
|
93
|
+
raise_error (bool): If True, re-raise exceptions after printing them. Default is False.
|
|
96
94
|
|
|
97
95
|
Returns:
|
|
98
96
|
The result of the SQL execution if a single query is passed. None if a list of queries is passed or an exception occurs.
|
|
97
|
+
"""
|
|
98
|
+
def handle_exception(e, q):
|
|
99
|
+
# Always print error
|
|
100
|
+
print(str(e).split('\n')[0])
|
|
101
|
+
print(q)
|
|
99
102
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
To execute a list of SQL queries:
|
|
105
|
-
>>> execute_query(["UPDATE table1 SET column1 = 42", "DELETE FROM table2 WHERE condition"])
|
|
106
|
-
|
|
107
|
-
Note:
|
|
108
|
-
- If a single query is passed, the function returns the result of the SQL execution.
|
|
109
|
-
- If a list of queries is passed, the function executes each query and returns None.
|
|
110
|
-
- If an exception occurs during execution, the error message and the problematic query are printed,
|
|
111
|
-
and the function returns None.
|
|
103
|
+
# Raise exception only if requested
|
|
104
|
+
if raise_error:
|
|
105
|
+
raise e
|
|
112
106
|
|
|
113
|
-
"""
|
|
114
|
-
# Check if the version of tdml is greater than the specified base version
|
|
115
107
|
if is_version_greater_than(tdml.__version__, base_version="17.20.00.03"):
|
|
116
|
-
|
|
117
|
-
if type(query) == list:
|
|
108
|
+
if isinstance(query, list):
|
|
118
109
|
for q in query:
|
|
119
110
|
try:
|
|
120
|
-
tdml.execute_sql(q)
|
|
111
|
+
tdml.execute_sql(q)
|
|
121
112
|
except Exception as e:
|
|
122
|
-
|
|
123
|
-
print(str(e).split('\n')[0])
|
|
124
|
-
print(q)
|
|
113
|
+
handle_exception(e, q)
|
|
125
114
|
else:
|
|
126
|
-
# If query is not a list, execute it and return the result
|
|
127
115
|
try:
|
|
128
116
|
return tdml.execute_sql(query)
|
|
129
117
|
except Exception as e:
|
|
130
|
-
|
|
131
|
-
print(str(e).split('\n')[0])
|
|
132
|
-
print(query)
|
|
118
|
+
handle_exception(e, query)
|
|
133
119
|
else:
|
|
134
|
-
|
|
135
|
-
if type(query) == list:
|
|
120
|
+
if isinstance(query, list):
|
|
136
121
|
for q in query:
|
|
137
122
|
try:
|
|
138
|
-
# Use the older execution method for the query
|
|
139
123
|
tdml.get_context().execute(q)
|
|
140
124
|
except Exception as e:
|
|
141
|
-
|
|
142
|
-
print(str(e).split('\n')[0])
|
|
143
|
-
print(q)
|
|
125
|
+
handle_exception(e, q)
|
|
144
126
|
else:
|
|
145
127
|
try:
|
|
146
|
-
# Execute the single query using the older method and return the result
|
|
147
128
|
return tdml.get_context().execute(query)
|
|
148
129
|
except Exception as e:
|
|
149
|
-
|
|
150
|
-
print(str(e).split('\n')[0])
|
|
151
|
-
print(query)
|
|
130
|
+
handle_exception(e, query)
|
|
152
131
|
|
|
153
|
-
|
|
154
|
-
return
|
|
132
|
+
return None
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=u0hoIQLWP7mLjqLJdGuyVtoxfKl-4mps8bVc5_c3EnI,60855
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -19,7 +19,7 @@ tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaU
|
|
|
19
19
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
20
20
|
tdfs4ds/feature_store/feature_data_processing.py,sha256=gjwypiTfwTyGyrP20v35Vu2uGIrCY80OBBeMVBsdjuk,45020
|
|
21
21
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
|
|
22
|
-
tdfs4ds/feature_store/feature_store_management.py,sha256=
|
|
22
|
+
tdfs4ds/feature_store/feature_store_management.py,sha256=mtPQkdMDhcOrhj9IAaH-FEP_znK53cYtEv8zXAbsigg,52123
|
|
23
23
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
24
24
|
tdfs4ds/process_store/process_followup.py,sha256=PvLcU7meg3ljBlPfuez3qwTVqpHHhVJxYxGqjgiHE8E,7265
|
|
25
25
|
tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
|
|
@@ -29,10 +29,10 @@ tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
|
29
29
|
tdfs4ds/utils/filter_management.py,sha256=5_8fYYtl8RQgbIi6L_1geNM0wJMm3t1n4QvNA5DnaQg,24760
|
|
30
30
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
31
31
|
tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
|
|
32
|
-
tdfs4ds/utils/query_management.py,sha256=
|
|
32
|
+
tdfs4ds/utils/query_management.py,sha256=kWDeTdsYcbpV5Tyhh-8uLRWvXh16nIdXNIJ97w76aNU,4848
|
|
33
33
|
tdfs4ds/utils/time_management.py,sha256=asIWvK5K81NNwAGqC-9Tv4Timscxyv0vyuPFs01whu0,31461
|
|
34
34
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
36
|
-
tdfs4ds-0.2.4.
|
|
37
|
-
tdfs4ds-0.2.4.
|
|
38
|
-
tdfs4ds-0.2.4.
|
|
35
|
+
tdfs4ds-0.2.4.39.dist-info/METADATA,sha256=X-V3kUdNWlXm7SKllAbGKQ2yAeHfw1da7pK_METpB-Q,14326
|
|
36
|
+
tdfs4ds-0.2.4.39.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
37
|
+
tdfs4ds-0.2.4.39.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
38
|
+
tdfs4ds-0.2.4.39.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|