tdfs4ds 0.2.4.25__py3-none-any.whl → 0.2.4.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +586 -564
- tdfs4ds/feature_store/feature_data_processing.py +367 -299
- tdfs4ds/feature_store/feature_query_retrieval.py +105 -52
- tdfs4ds/feature_store/feature_store_management.py +268 -285
- tdfs4ds/process_store/process_followup.py +113 -2
- tdfs4ds/process_store/process_query_administration.py +1 -1
- tdfs4ds/process_store/process_registration_management.py +67 -55
- tdfs4ds/process_store/process_store_catalog_management.py +2 -2
- tdfs4ds/utils/filter_management.py +521 -138
- tdfs4ds/utils/query_management.py +18 -40
- tdfs4ds/utils/time_management.py +547 -97
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/RECORD +15 -15
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ import pandas as pd
|
|
|
9
9
|
import tqdm
|
|
10
10
|
import inspect
|
|
11
11
|
import re
|
|
12
|
+
from tdfs4ds import logger_safe, logger
|
|
12
13
|
|
|
13
14
|
@execute_query_wrapper
|
|
14
15
|
def feature_store_catalog_view_creation():
|
|
@@ -129,57 +130,27 @@ def feature_store_catalog_creation(if_exists='replace', comment='this table is a
|
|
|
129
130
|
return tdfs4ds.FEATURE_CATALOG_NAME
|
|
130
131
|
|
|
131
132
|
|
|
132
|
-
def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index
|
|
133
|
+
def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index=None, partitioning=''):
|
|
133
134
|
"""
|
|
134
135
|
Creates a table and a corresponding view for feature storage in a Teradata database schema, based on specified entity ID and feature type.
|
|
135
|
-
|
|
136
|
-
This function automates the creation of a table and view tailored for storing features in a structured manner. It leverages provided entity identifiers and feature types to generate table and view names dynamically, integrating with an existing feature catalog for consistency and reference. The table and view are created with considerations for primary indexing and optional partitioning strategies to optimize data management and access.
|
|
137
|
-
|
|
138
|
-
Parameters:
|
|
139
|
-
- entity_id (dict): Maps column names to their respective data types, defining the structure of the entity identifier(s).
|
|
140
|
-
- feature_type (str): Specifies the data type of the feature (e.g., 'FLOAT', 'BIGINT', 'VARCHAR_LATIN', 'VARCHAR_UNICODE').
|
|
141
|
-
- if_exists (str, optional): Determines the action if the table already exists. Options include:
|
|
142
|
-
'fail' (default), which raises an error; and 'replace', which drops the existing table and creates a new one.
|
|
143
|
-
- primary_index (list, optional): Specifies the columns to be used as the primary index for the table. Enhances data retrieval performance.
|
|
144
|
-
- partitioning (str, optional): SQL clause to define table partitioning. Aids in managing large datasets efficiently.
|
|
145
|
-
|
|
146
|
-
Returns:
|
|
147
|
-
str: The name of the newly created feature store table.
|
|
148
|
-
|
|
149
|
-
Note:
|
|
150
|
-
- Utilizes default schema and feature catalog names as defined in the tdfs4ds module.
|
|
151
|
-
- The primary index typically includes the entity ID, feature ID, and feature version for optimal data organization.
|
|
152
|
-
- A secondary index on the feature ID facilitates efficient querying.
|
|
153
|
-
- Corresponding views offer a snapshot of the current valid-time features, simplifying temporal queries.
|
|
154
|
-
- Existing tables are handled based on the 'if_exists' parameter, with support for replacing or retaining the tables.
|
|
155
|
-
- Assumes necessary database access and permissions are available for table and view creation.
|
|
156
|
-
|
|
157
|
-
Example Usage:
|
|
158
|
-
>>> entity_id_dict = {'customer_id': 'INTEGER'}
|
|
159
|
-
>>> table_name = feature_store_table_creation(entity_id_dict, 'FLOAT')
|
|
160
|
-
>>> print(f"Feature store table {table_name} created successfully.")
|
|
161
136
|
"""
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() ==table_name.lower()]) > 0:
|
|
166
|
-
if tdfs4ds.DISPLAY_LOGS:
|
|
167
|
-
print(f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
|
|
137
|
+
table_name, view_name = get_feature_store_table_name(entity_id, feature_type, primary_index=primary_index, partitioning=partitioning)
|
|
138
|
+
if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() == table_name.lower()]) > 0:
|
|
139
|
+
logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
|
|
168
140
|
return table_name
|
|
169
141
|
else:
|
|
170
|
-
|
|
171
|
-
print(f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
|
|
142
|
+
logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
|
|
172
143
|
|
|
173
144
|
query_feature_value = {
|
|
174
145
|
'FLOAT': 'FEATURE_VALUE FLOAT',
|
|
175
146
|
'BIGINT': 'FEATURE_VALUE BIGINT',
|
|
176
147
|
'VARCHAR_LATIN': f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET LATIN',
|
|
177
148
|
'VARCHAR_UNICODE': f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET UNICODE',
|
|
178
|
-
'TIMESTAMP0'
|
|
179
|
-
'TIMESTAMP0TZ'
|
|
180
|
-
'PERIODTS0'
|
|
149
|
+
'TIMESTAMP0': 'FEATURE_VALUE TIMESTAMP(0)',
|
|
150
|
+
'TIMESTAMP0TZ': 'FEATURE_VALUE TIMESTAMP(0) WITH TIME ZONE',
|
|
151
|
+
'PERIODTS0': 'FEATURE_VALUE PERIOD(TIMESTAMP(0))',
|
|
181
152
|
'PERIODTS0TZ': 'FEATURE_VALUE PERIOD(TIMESTAMP(0) WITH TIME ZONE)',
|
|
182
|
-
'DECIMAL'
|
|
153
|
+
'DECIMAL': 'FEATURE_VALUE DECIMAL(38,19)'
|
|
183
154
|
}
|
|
184
155
|
|
|
185
156
|
# Construct the column definitions for the table based on the entity ID
|
|
@@ -196,12 +167,14 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
|
|
|
196
167
|
# SQL query to create the feature store table
|
|
197
168
|
if feature_type.lower() == 'ref':
|
|
198
169
|
partitioning = partitioning.replace('"', "'")
|
|
199
|
-
partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),','')
|
|
170
|
+
partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),', '')
|
|
200
171
|
partitioning = partitioning.replace(
|
|
201
172
|
f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH})',
|
|
202
|
-
''
|
|
173
|
+
''
|
|
174
|
+
)
|
|
203
175
|
substr = extract_partition_content(partitioning.upper())
|
|
204
|
-
if len(substr)==0:
|
|
176
|
+
if len(substr) == 0:
|
|
177
|
+
partitioning = ''
|
|
205
178
|
query = f"""
|
|
206
179
|
CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
|
|
207
180
|
FALLBACK,
|
|
@@ -217,7 +190,7 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
|
|
|
217
190
|
{partitioning};
|
|
218
191
|
"""
|
|
219
192
|
else:
|
|
220
|
-
partitioning = partitioning.replace('"',"'")
|
|
193
|
+
partitioning = partitioning.replace('"', "'")
|
|
221
194
|
query = f"""
|
|
222
195
|
CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
|
|
223
196
|
FALLBACK,
|
|
@@ -266,39 +239,40 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
|
|
|
266
239
|
|
|
267
240
|
try:
|
|
268
241
|
# Attempt to execute the create table query
|
|
269
|
-
execute_query(query)
|
|
270
|
-
execute_query(query3)
|
|
242
|
+
execute_query(query, raise_error=True)
|
|
243
|
+
execute_query(query3, raise_error=True)
|
|
271
244
|
if tdml.display.print_sqlmr_query:
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
#execute_query(query2)
|
|
245
|
+
logger_safe('info', query)
|
|
246
|
+
logger_safe('info', query3)
|
|
247
|
+
logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been created')
|
|
248
|
+
# execute_query(query2)
|
|
276
249
|
except Exception as e:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
if
|
|
280
|
-
execute_query(f'DROP TABLE {tdfs4ds.SCHEMA}.{table_name}')
|
|
281
|
-
|
|
250
|
+
msg = str(e).split('\n')[0]
|
|
251
|
+
logger_safe('error', msg)
|
|
252
|
+
if msg.endswith('already exists.') and (if_exists == 'replace'):
|
|
253
|
+
execute_query(f'DROP TABLE {tdfs4ds.SCHEMA}.{table_name}', raise_error=True)
|
|
254
|
+
logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been dropped')
|
|
282
255
|
try:
|
|
283
256
|
# Attempt to recreate the table after dropping it
|
|
284
|
-
execute_query(query)
|
|
285
|
-
|
|
257
|
+
execute_query(query, raise_error=True)
|
|
258
|
+
logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been re-created')
|
|
286
259
|
if tdml.display.print_sqlmr_query:
|
|
287
|
-
|
|
288
|
-
except Exception as
|
|
289
|
-
|
|
260
|
+
logger_safe('info', query)
|
|
261
|
+
except Exception as e2:
|
|
262
|
+
logger_safe('error', str(e2).split('\n')[0])
|
|
290
263
|
|
|
291
264
|
try:
|
|
292
265
|
# Attempt to create the view
|
|
293
|
-
execute_query(query_view)
|
|
266
|
+
execute_query(query_view, raise_error=True)
|
|
294
267
|
if tdml.display.print_sqlmr_query:
|
|
295
|
-
|
|
296
|
-
|
|
268
|
+
logger_safe('info', query_view)
|
|
269
|
+
logger_safe('info', f'VIEW {tdfs4ds.SCHEMA}.{view_name} has been created')
|
|
297
270
|
except Exception as e:
|
|
298
|
-
|
|
271
|
+
logger_safe('error', str(e).split('\n')[0])
|
|
299
272
|
|
|
300
273
|
return table_name
|
|
301
274
|
|
|
275
|
+
|
|
302
276
|
def register_features(entity_id, feature_names_types, primary_index = None, partitioning = ''):
|
|
303
277
|
"""
|
|
304
278
|
Orchestrates the registration or update of feature definitions in a Teradata database's feature catalog.
|
|
@@ -350,46 +324,47 @@ def register_features(entity_id, feature_names_types, primary_index = None, part
|
|
|
350
324
|
|
|
351
325
|
def _register_features_merge(entity_id, feature_names_types, primary_index=None, partitioning=''):
|
|
352
326
|
"""
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
327
|
+
Register or update feature definitions in the feature catalog, with temporal support.
|
|
328
|
+
|
|
329
|
+
This function builds (or refreshes) entries in the Teradata feature catalog from a
|
|
330
|
+
mapping of feature names to their metadata, computes the target feature store table
|
|
331
|
+
and view names, stages the metadata to a temporary table, and executes a MERGE into
|
|
332
|
+
the catalog (with optional VALIDTIME support based on `tdfs4ds.FEATURE_STORE_TIME`).
|
|
333
|
+
|
|
334
|
+
Parameters
|
|
335
|
+
----------
|
|
336
|
+
entity_id : dict[str, Any]
|
|
337
|
+
Mapping of entity-key column names to types. Only the keys (column names) are
|
|
338
|
+
required here; values are not used by this function.
|
|
339
|
+
feature_names_types : dict[str, dict]
|
|
340
|
+
Dict of feature name -> {"type": <SQL_TYPE>, "id": <int>} describing each
|
|
341
|
+
feature’s storage type and identifier in the catalog.
|
|
342
|
+
primary_index : list[str] | None, optional
|
|
343
|
+
Primary index column(s) to use when deriving the feature store table/view names.
|
|
344
|
+
If None, defaults are inferred by `get_feature_store_table_name`.
|
|
345
|
+
partitioning : str, optional
|
|
346
|
+
Partitioning expression or comma-separated column list used by
|
|
347
|
+
`get_feature_store_table_name`.
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
pd.DataFrame
|
|
352
|
+
A dataframe of the features that were (up)registered, including:
|
|
353
|
+
FEATURE_NAME, FEATURE_TYPE, FEATURE_ID, FEATURE_TABLE, FEATURE_VIEW,
|
|
354
|
+
ENTITY_NAME, FEATURE_DATABASE, DATA_DOMAIN.
|
|
355
|
+
|
|
356
|
+
Notes
|
|
357
|
+
-----
|
|
358
|
+
- When `tdfs4ds.FEATURE_STORE_TIME is None`, uses CURRENT VALIDTIME (non-explicit start/end).
|
|
359
|
+
Otherwise uses `VALIDTIME PERIOD ('<FEATURE_STORE_TIME>', '<END_PERIOD>')` and adds
|
|
360
|
+
the valid-time start/end when inserting.
|
|
361
|
+
- Respects `tdfs4ds.DISPLAY_LOGS` via `logger_safe`.
|
|
388
362
|
"""
|
|
389
363
|
|
|
390
|
-
|
|
364
|
+
# --- VALIDTIME setup -----------------------------------------------------
|
|
365
|
+
if tdfs4ds.FEATURE_STORE_TIME is None:
|
|
391
366
|
validtime_statement = 'CURRENT VALIDTIME'
|
|
392
|
-
validtime_start =
|
|
367
|
+
validtime_start = "CAST(CURRENT_TIME AS TIMESTAMP(0) WITH TIME ZONE)"
|
|
393
368
|
else:
|
|
394
369
|
validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{tdfs4ds.END_PERIOD})'"
|
|
395
370
|
validtime_start = f"CAST('{tdfs4ds.FEATURE_STORE_TIME}' AS TIMESTAMP(0) WITH TIME ZONE)"
|
|
@@ -399,154 +374,174 @@ def _register_features_merge(entity_id, feature_names_types, primary_index=None,
|
|
|
399
374
|
else:
|
|
400
375
|
end_period_ = tdfs4ds.END_PERIOD
|
|
401
376
|
|
|
402
|
-
|
|
403
|
-
|
|
377
|
+
# --- Input checks & early exit ------------------------------------------
|
|
378
|
+
if not feature_names_types:
|
|
379
|
+
logger_safe("info", "register_features: no new features to register")
|
|
404
380
|
return
|
|
405
381
|
|
|
406
|
-
#
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
ENTITY_ID__ = ','.join([k for k in entity_id_list])
|
|
382
|
+
# --- Entity columns (ordered, stable) -----------------------------------
|
|
383
|
+
entity_cols = sorted(list(entity_id.keys()))
|
|
384
|
+
ENTITY_ID__ = ",".join(entity_cols)
|
|
410
385
|
|
|
411
|
-
#
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
386
|
+
# --- Build dataframe safely (no transpose tricks) ------------------------
|
|
387
|
+
rows = []
|
|
388
|
+
for fname, meta in feature_names_types.items():
|
|
389
|
+
try:
|
|
390
|
+
rows.append({
|
|
391
|
+
"FEATURE_NAME": fname,
|
|
392
|
+
"FEATURE_TYPE": meta["type"],
|
|
393
|
+
"FEATURE_ID": meta["id"],
|
|
394
|
+
})
|
|
395
|
+
except KeyError as e:
|
|
396
|
+
logger_safe("error", "register_features: missing key %s in feature '%s' meta=%s", str(e), fname, meta)
|
|
397
|
+
raise
|
|
398
|
+
|
|
399
|
+
df = pd.DataFrame(rows, columns=["FEATURE_NAME", "FEATURE_TYPE", "FEATURE_ID"])
|
|
400
|
+
|
|
401
|
+
logger_safe(
|
|
402
|
+
"debug",
|
|
403
|
+
"register_features: features_count=%d | entity_cols=%s | primary_index=%s | partitioning=%s",
|
|
404
|
+
len(df),
|
|
405
|
+
entity_cols,
|
|
406
|
+
primary_index,
|
|
407
|
+
partitioning,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# --- Compute feature table & view names ---------------------------------
|
|
411
|
+
# Use apply to preserve original order; get_feature_store_table_name returns (table, view)
|
|
412
|
+
df["FEATURE_TABLE"] = df.apply(
|
|
413
|
+
lambda row: get_feature_store_table_name(
|
|
414
|
+
entity_id,
|
|
415
|
+
row["FEATURE_TYPE"],
|
|
416
|
+
primary_index=primary_index,
|
|
417
|
+
partitioning=partitioning
|
|
418
|
+
)[0],
|
|
419
|
+
axis=1
|
|
420
|
+
)
|
|
421
|
+
df["FEATURE_VIEW"] = df.apply(
|
|
422
|
+
lambda row: get_feature_store_table_name(
|
|
423
|
+
entity_id,
|
|
424
|
+
row["FEATURE_TYPE"],
|
|
425
|
+
primary_index=primary_index,
|
|
426
|
+
partitioning=partitioning
|
|
427
|
+
)[1],
|
|
428
|
+
axis=1
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# --- Add catalog columns -------------------------------------------------
|
|
432
|
+
df["ENTITY_NAME"] = ENTITY_ID__
|
|
433
|
+
df["FEATURE_DATABASE"] = tdfs4ds.SCHEMA
|
|
434
|
+
df["DATA_DOMAIN"] = tdfs4ds.DATA_DOMAIN
|
|
435
|
+
|
|
436
|
+
# --- Stage to temp table -------------------------------------------------
|
|
437
|
+
tdml.copy_to_sql(
|
|
438
|
+
df,
|
|
439
|
+
table_name="temp",
|
|
440
|
+
schema_name=tdfs4ds.SCHEMA,
|
|
441
|
+
if_exists="replace",
|
|
442
|
+
primary_index="FEATURE_ID",
|
|
443
|
+
types={"FEATURE_ID": tdml.BIGINT},
|
|
444
|
+
)
|
|
445
|
+
logger_safe("debug", "register_features: staged %d rows to %s.temp", len(df), tdfs4ds.SCHEMA)
|
|
446
|
+
|
|
447
|
+
# --- Build MERGE statement ----------------------------------------------
|
|
448
|
+
if tdfs4ds.FEATURE_STORE_TIME is None:
|
|
449
|
+
# no explicit start/end in INSERT branch
|
|
456
450
|
query_merge = f"""
|
|
457
451
|
{validtime_statement}
|
|
458
|
-
MERGE INTO
|
|
452
|
+
MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
|
|
459
453
|
USING (
|
|
460
454
|
SELECT
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
WHEN MATCHED THEN
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
485
|
-
--,ENTITY_NAME = UPDATED_FEATURES.ENTITY_NAME -- modified
|
|
486
|
-
WHEN NOT MATCHED THEN
|
|
487
|
-
INSERT
|
|
455
|
+
CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
|
|
456
|
+
, A.FEATURE_NAME
|
|
457
|
+
, A.FEATURE_TYPE
|
|
458
|
+
, A.FEATURE_TABLE
|
|
459
|
+
, A.FEATURE_DATABASE
|
|
460
|
+
, A.FEATURE_VIEW
|
|
461
|
+
, A.ENTITY_NAME
|
|
462
|
+
, A.DATA_DOMAIN
|
|
463
|
+
FROM {tdfs4ds.SCHEMA}.temp A
|
|
464
|
+
LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
|
|
465
|
+
ON A.FEATURE_NAME = B.FEATURE_NAME
|
|
466
|
+
AND A.ENTITY_NAME = B.ENTITY_NAME
|
|
467
|
+
AND A.DATA_DOMAIN = B.DATA_DOMAIN
|
|
468
|
+
) UPDATED_FEATURES
|
|
469
|
+
ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
|
|
470
|
+
AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
|
|
471
|
+
AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
|
|
472
|
+
WHEN MATCHED THEN UPDATE SET
|
|
473
|
+
FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE
|
|
474
|
+
, FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE
|
|
475
|
+
, FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
|
|
476
|
+
, FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
477
|
+
WHEN NOT MATCHED THEN INSERT
|
|
488
478
|
( UPDATED_FEATURES.FEATURE_ID
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
479
|
+
, UPDATED_FEATURES.FEATURE_NAME
|
|
480
|
+
, UPDATED_FEATURES.FEATURE_TYPE
|
|
481
|
+
, UPDATED_FEATURES.FEATURE_TABLE
|
|
482
|
+
, UPDATED_FEATURES.FEATURE_DATABASE
|
|
483
|
+
, UPDATED_FEATURES.FEATURE_VIEW
|
|
484
|
+
, UPDATED_FEATURES.ENTITY_NAME
|
|
485
|
+
, UPDATED_FEATURES.DATA_DOMAIN
|
|
486
|
+
);
|
|
487
|
+
"""
|
|
498
488
|
else:
|
|
489
|
+
# insert with explicit valid-time start/end
|
|
499
490
|
query_merge = f"""
|
|
500
491
|
{validtime_statement}
|
|
501
|
-
MERGE INTO
|
|
492
|
+
MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
|
|
502
493
|
USING (
|
|
503
494
|
SELECT
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
WHEN MATCHED THEN
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
528
|
-
--,ENTITY_NAME = UPDATED_FEATURES.ENTITY_NAME -- modified
|
|
529
|
-
WHEN NOT MATCHED THEN
|
|
530
|
-
INSERT
|
|
495
|
+
CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
|
|
496
|
+
, A.FEATURE_NAME
|
|
497
|
+
, A.FEATURE_TYPE
|
|
498
|
+
, A.FEATURE_TABLE
|
|
499
|
+
, A.FEATURE_DATABASE
|
|
500
|
+
, A.FEATURE_VIEW
|
|
501
|
+
, A.ENTITY_NAME
|
|
502
|
+
, A.DATA_DOMAIN
|
|
503
|
+
FROM {tdfs4ds.SCHEMA}.temp A
|
|
504
|
+
LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
|
|
505
|
+
ON A.FEATURE_NAME = B.FEATURE_NAME
|
|
506
|
+
AND A.ENTITY_NAME = B.ENTITY_NAME
|
|
507
|
+
AND A.DATA_DOMAIN = B.DATA_DOMAIN
|
|
508
|
+
) UPDATED_FEATURES
|
|
509
|
+
ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
|
|
510
|
+
AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
|
|
511
|
+
AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
|
|
512
|
+
WHEN MATCHED THEN UPDATE SET
|
|
513
|
+
FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE
|
|
514
|
+
, FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE
|
|
515
|
+
, FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
|
|
516
|
+
, FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
517
|
+
WHEN NOT MATCHED THEN INSERT
|
|
531
518
|
( UPDATED_FEATURES.FEATURE_ID
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
519
|
+
, UPDATED_FEATURES.FEATURE_NAME
|
|
520
|
+
, UPDATED_FEATURES.FEATURE_TYPE
|
|
521
|
+
, UPDATED_FEATURES.FEATURE_TABLE
|
|
522
|
+
, UPDATED_FEATURES.FEATURE_DATABASE
|
|
523
|
+
, UPDATED_FEATURES.FEATURE_VIEW
|
|
524
|
+
, UPDATED_FEATURES.ENTITY_NAME
|
|
525
|
+
, UPDATED_FEATURES.DATA_DOMAIN
|
|
526
|
+
, {validtime_start}
|
|
527
|
+
, '{end_period_}'
|
|
528
|
+
);
|
|
529
|
+
"""
|
|
542
530
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
# Execute the update and insert queries
|
|
531
|
+
logger_safe("debug", "register_features: merge_sql_preview=%s", " ".join(query_merge.split())[:400] + " ...")
|
|
532
|
+
|
|
533
|
+
# --- Execute MERGE -------------------------------------------------------
|
|
547
534
|
execute_query(query_merge)
|
|
535
|
+
logger_safe(
|
|
536
|
+
"info",
|
|
537
|
+
"register_features: merged %d features into %s.%s",
|
|
538
|
+
len(df),
|
|
539
|
+
tdfs4ds.SCHEMA,
|
|
540
|
+
tdfs4ds.FEATURE_CATALOG_NAME,
|
|
541
|
+
)
|
|
548
542
|
|
|
549
543
|
return df
|
|
544
|
+
|
|
550
545
|
def _register_features_update_insert(entity_id, feature_names_types, primary_index = None, partitioning = ''):
|
|
551
546
|
"""
|
|
552
547
|
Registers or updates feature definitions in a Teradata database's feature catalog, associating entity identifiers
|
|
@@ -869,39 +864,6 @@ def Gettdtypes(tddf, features_columns, entity_id):
|
|
|
869
864
|
# Increment the feature ID for the next iteration.
|
|
870
865
|
feature_id += 1
|
|
871
866
|
|
|
872
|
-
# # Iterate over the data types of the columns in the DataFrame.
|
|
873
|
-
# for k, v in types.items():
|
|
874
|
-
# # If the column name does not exist in the feature catalog table and is in the list of feature column names...
|
|
875
|
-
# if k.upper() not in [n.upper() for n in existing_features] and k.upper() in [n.upper() for n in features_columns]:
|
|
876
|
-
# # If the data type of the column is integer...
|
|
877
|
-
# if 'int' in str(v.lower()):
|
|
878
|
-
# # Add an entry to the result dictionary for the column name with its data type and new feature ID.
|
|
879
|
-
# res[k] = {'type': 'BIGINT', 'id': feature_id}
|
|
880
|
-
# # If the data type of the column is float...
|
|
881
|
-
# elif 'float' in str(v.lower()):
|
|
882
|
-
# # Add an entry to the result dictionary for the column name with its data type and new feature ID.
|
|
883
|
-
# res[k] = {'type': 'FLOAT', 'id': feature_id}
|
|
884
|
-
# # If the data type of the column is varchar with unicode encoding ...
|
|
885
|
-
# elif 'unicode' in str(v.lower()):
|
|
886
|
-
# res[k] = {'type': 'VARCHAR_UNICODE', 'id': feature_id}
|
|
887
|
-
# # Print a message that the data type is not yet managed.
|
|
888
|
-
# #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
889
|
-
# # If the data type of the column is varchar with unicode encoding ...
|
|
890
|
-
# elif 'latin' in str(v.lower()):
|
|
891
|
-
# res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
|
|
892
|
-
# # Print a message that the data type is not yet managed.
|
|
893
|
-
# #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
894
|
-
# elif 'decimal' in str(v.lower()):
|
|
895
|
-
# res[k] = {'type': 'DECIMAL', 'id': feature_id}
|
|
896
|
-
# # Print a message that the data type is not yet managed.
|
|
897
|
-
# # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
898
|
-
# else:
|
|
899
|
-
# res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
|
|
900
|
-
# # Print a message that the data type is not yet managed.
|
|
901
|
-
# # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
902
|
-
# # Increment the feature ID for the next iteration.
|
|
903
|
-
# feature_id += 1
|
|
904
|
-
|
|
905
867
|
# Return the result dictionary.
|
|
906
868
|
return res
|
|
907
869
|
|
|
@@ -979,36 +941,50 @@ def tdstone2_Gettdtypes(existing_model, entity_id, display_logs=False):
|
|
|
979
941
|
# Return the dictionary containing feature names, types, and IDs.
|
|
980
942
|
return res
|
|
981
943
|
|
|
982
|
-
def delete_feature(feature_name, data_domain=None):
|
|
944
|
+
def delete_feature(feature_name, entity_id, data_domain=None):
|
|
983
945
|
"""
|
|
984
|
-
Delete the values of a specific feature from the feature table
|
|
946
|
+
Delete the values of a specific feature for given entities from the feature table
|
|
947
|
+
within a specified data domain.
|
|
985
948
|
|
|
986
949
|
This function constructs and executes two SQL queries against a Teradata database
|
|
987
|
-
to remove a feature specified by its name. The first query
|
|
988
|
-
where the feature resides, based on the feature name
|
|
989
|
-
deletes the feature from the
|
|
950
|
+
to remove a feature specified by its name and entity identifiers. The first query
|
|
951
|
+
retrieves the table name where the feature resides, based on the feature name,
|
|
952
|
+
entity, and data domain. The second query deletes the feature values from the
|
|
953
|
+
identified table.
|
|
990
954
|
|
|
991
955
|
Parameters:
|
|
992
956
|
- feature_name (str): The name of the feature to be removed.
|
|
993
|
-
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
957
|
+
- entity_id (str or list of str): Entity identifier(s). If a string is provided,
|
|
958
|
+
it will be converted to a single-element list. The list is always sorted
|
|
959
|
+
alphabetically before use.
|
|
960
|
+
- data_domain (str, optional): The data domain where the feature is located.
|
|
961
|
+
If not specified, the function uses the default data domain defined in
|
|
962
|
+
`tdfs4ds.DATA_DOMAIN`.
|
|
963
|
+
|
|
964
|
+
Behavior:
|
|
965
|
+
- The function checks if the `DEBUG_MODE` flag in the `tdfs4ds` module is set to True.
|
|
966
|
+
If so, it prints the generated SQL queries and the resolved table name for debugging.
|
|
967
|
+
- If the feature table cannot be resolved, the function returns without executing
|
|
968
|
+
a delete query.
|
|
998
969
|
|
|
999
|
-
|
|
970
|
+
Returns:
|
|
971
|
+
- None
|
|
1000
972
|
|
|
1001
973
|
Note:
|
|
1002
974
|
- The function assumes the presence of a module `tdfs4ds` with predefined constants
|
|
1003
|
-
such as `DATA_DOMAIN`, `SCHEMA`, `
|
|
975
|
+
such as `DATA_DOMAIN`, `SCHEMA`, `FEATURE_CATALOG_NAME_VIEW`, and a flag `DEBUG_MODE`.
|
|
1004
976
|
- It also assumes a `tdml` module or object with an `execute_sql` method capable of
|
|
1005
977
|
executing SQL queries against a Teradata database and fetching the results.
|
|
1006
978
|
|
|
1007
979
|
Raises:
|
|
1008
|
-
-
|
|
1009
|
-
|
|
980
|
+
- Exceptions related to SQL execution or connection issues may be raised but are not
|
|
981
|
+
explicitly handled, except for printing the error message.
|
|
1010
982
|
"""
|
|
1011
983
|
|
|
984
|
+
if isinstance(entity_id, str):
|
|
985
|
+
entity_id = [entity_id]
|
|
986
|
+
entity_id = sorted(entity_id)
|
|
987
|
+
|
|
1012
988
|
if data_domain is None:
|
|
1013
989
|
data_domain = tdfs4ds.DATA_DOMAIN
|
|
1014
990
|
|
|
@@ -1016,19 +992,21 @@ def delete_feature(feature_name, data_domain=None):
|
|
|
1016
992
|
SEL FEATURE_DATABASE||'.'||FEATURE_TABLE AS TABLE_NAME
|
|
1017
993
|
FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
|
|
1018
994
|
WHERE FEATURE_NAME = '{feature_name}'
|
|
1019
|
-
AND DATA_DOMAIN = '{data_domain}'
|
|
995
|
+
AND DATA_DOMAIN = '{data_domain}'
|
|
996
|
+
AND ENTITY_NAME = '{','.join([e.upper() for e in entity_id])}'"""
|
|
1020
997
|
if tdfs4ds.DEBUG_MODE:
|
|
1021
998
|
print(query0)
|
|
1022
999
|
|
|
1023
1000
|
table_name = tdml.execute_sql(query0).fetchall()
|
|
1024
|
-
if len(table_name)>0:
|
|
1001
|
+
if len(table_name) > 0:
|
|
1025
1002
|
table_name = table_name[0][0]
|
|
1026
1003
|
else:
|
|
1027
1004
|
return
|
|
1028
1005
|
if tdfs4ds.DEBUG_MODE:
|
|
1029
1006
|
print('table name : ', table_name)
|
|
1007
|
+
|
|
1030
1008
|
query = f"""
|
|
1031
|
-
DELETE {table_name}
|
|
1009
|
+
NONSEQUENCED VALIDTIME DELETE {table_name}
|
|
1032
1010
|
WHERE FEATURE_ID = (
|
|
1033
1011
|
SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
|
|
1034
1012
|
WHERE FEATURE_NAME = '{feature_name}'
|
|
@@ -1044,6 +1022,7 @@ def delete_feature(feature_name, data_domain=None):
|
|
|
1044
1022
|
|
|
1045
1023
|
return
|
|
1046
1024
|
|
|
1025
|
+
|
|
1047
1026
|
def remove_feature(feature_name, entity_id, data_domain=None):
|
|
1048
1027
|
"""
|
|
1049
1028
|
Attempts to remove a specific feature from the feature catalog and any associated data,
|
|
@@ -1060,7 +1039,9 @@ def remove_feature(feature_name, entity_id, data_domain=None):
|
|
|
1060
1039
|
|
|
1061
1040
|
Parameters:
|
|
1062
1041
|
- feature_name (str): The name of the feature to be removed.
|
|
1063
|
-
- entity_id (list of str):
|
|
1042
|
+
- entity_id (str or list of str): Entity identifier(s). If a string is provided,
|
|
1043
|
+
it will be converted to a single-element list. The list is always sorted
|
|
1044
|
+
alphabetically before use.
|
|
1064
1045
|
- data_domain (str, optional): The data domain where the feature is located. If not provided,
|
|
1065
1046
|
the function uses the default data domain from the `tdfs4ds.DATA_DOMAIN` setting.
|
|
1066
1047
|
|
|
@@ -1084,16 +1065,19 @@ def remove_feature(feature_name, entity_id, data_domain=None):
|
|
|
1084
1065
|
- SQL execution or connection exceptions might occur but are not explicitly handled by this function.
|
|
1085
1066
|
"""
|
|
1086
1067
|
|
|
1068
|
+
if isinstance(entity_id, str):
|
|
1069
|
+
entity_id = [entity_id]
|
|
1070
|
+
entity_id = sorted(entity_id)
|
|
1071
|
+
|
|
1087
1072
|
if data_domain is None:
|
|
1088
1073
|
data_domain = tdfs4ds.DATA_DOMAIN
|
|
1089
1074
|
|
|
1090
1075
|
try:
|
|
1091
|
-
delete_feature(feature_name, data_domain)
|
|
1076
|
+
delete_feature(feature_name, entity_id, data_domain)
|
|
1092
1077
|
except Exception as e:
|
|
1093
1078
|
print(str(e).split('\n')[0])
|
|
1094
1079
|
return
|
|
1095
1080
|
|
|
1096
|
-
entity_id.sort()
|
|
1097
1081
|
query = f"""
|
|
1098
1082
|
NONSEQUENCED VALIDTIME DELETE {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME}
|
|
1099
1083
|
WHERE FEATURE_NAME = '{feature_name}'
|
|
@@ -1102,7 +1086,6 @@ def remove_feature(feature_name, entity_id, data_domain=None):
|
|
|
1102
1086
|
"""
|
|
1103
1087
|
if tdfs4ds.DEBUG_MODE:
|
|
1104
1088
|
print(query)
|
|
1089
|
+
|
|
1105
1090
|
tdml.execute_sql(query)
|
|
1106
1091
|
return
|
|
1107
|
-
|
|
1108
|
-
|