tdfs4ds 0.2.4.25__py3-none-any.whl → 0.2.4.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ import pandas as pd
9
9
  import tqdm
10
10
  import inspect
11
11
  import re
12
+ from tdfs4ds import logger_safe, logger
12
13
 
13
14
  @execute_query_wrapper
14
15
  def feature_store_catalog_view_creation():
@@ -129,57 +130,27 @@ def feature_store_catalog_creation(if_exists='replace', comment='this table is a
129
130
  return tdfs4ds.FEATURE_CATALOG_NAME
130
131
 
131
132
 
132
- def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index = None, partitioning = ''):
133
+ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index=None, partitioning=''):
133
134
  """
134
135
  Creates a table and a corresponding view for feature storage in a Teradata database schema, based on specified entity ID and feature type.
135
-
136
- This function automates the creation of a table and view tailored for storing features in a structured manner. It leverages provided entity identifiers and feature types to generate table and view names dynamically, integrating with an existing feature catalog for consistency and reference. The table and view are created with considerations for primary indexing and optional partitioning strategies to optimize data management and access.
137
-
138
- Parameters:
139
- - entity_id (dict): Maps column names to their respective data types, defining the structure of the entity identifier(s).
140
- - feature_type (str): Specifies the data type of the feature (e.g., 'FLOAT', 'BIGINT', 'VARCHAR_LATIN', 'VARCHAR_UNICODE').
141
- - if_exists (str, optional): Determines the action if the table already exists. Options include:
142
- 'fail' (default), which raises an error; and 'replace', which drops the existing table and creates a new one.
143
- - primary_index (list, optional): Specifies the columns to be used as the primary index for the table. Enhances data retrieval performance.
144
- - partitioning (str, optional): SQL clause to define table partitioning. Aids in managing large datasets efficiently.
145
-
146
- Returns:
147
- str: The name of the newly created feature store table.
148
-
149
- Note:
150
- - Utilizes default schema and feature catalog names as defined in the tdfs4ds module.
151
- - The primary index typically includes the entity ID, feature ID, and feature version for optimal data organization.
152
- - A secondary index on the feature ID facilitates efficient querying.
153
- - Corresponding views offer a snapshot of the current valid-time features, simplifying temporal queries.
154
- - Existing tables are handled based on the 'if_exists' parameter, with support for replacing or retaining the tables.
155
- - Assumes necessary database access and permissions are available for table and view creation.
156
-
157
- Example Usage:
158
- >>> entity_id_dict = {'customer_id': 'INTEGER'}
159
- >>> table_name = feature_store_table_creation(entity_id_dict, 'FLOAT')
160
- >>> print(f"Feature store table {table_name} created successfully.")
161
136
  """
162
-
163
-
164
- table_name, view_name = get_feature_store_table_name(entity_id, feature_type, primary_index = primary_index, partitioning = partitioning)
165
- if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() ==table_name.lower()]) > 0:
166
- if tdfs4ds.DISPLAY_LOGS:
167
- print(f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
137
+ table_name, view_name = get_feature_store_table_name(entity_id, feature_type, primary_index=primary_index, partitioning=partitioning)
138
+ if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() == table_name.lower()]) > 0:
139
+ logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
168
140
  return table_name
169
141
  else:
170
- if tdfs4ds.DISPLAY_LOGS:
171
- print(f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
142
+ logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
172
143
 
173
144
  query_feature_value = {
174
145
  'FLOAT': 'FEATURE_VALUE FLOAT',
175
146
  'BIGINT': 'FEATURE_VALUE BIGINT',
176
147
  'VARCHAR_LATIN': f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET LATIN',
177
148
  'VARCHAR_UNICODE': f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET UNICODE',
178
- 'TIMESTAMP0' : 'FEATURE_VALUE TIMESTAMP(0)',
179
- 'TIMESTAMP0TZ' : 'FEATURE_VALUE TIMESTAMP(0) WITH TIME ZONE',
180
- 'PERIODTS0' : 'FEATURE_VALUE PERIOD(TIMESTAMP(0))',
149
+ 'TIMESTAMP0': 'FEATURE_VALUE TIMESTAMP(0)',
150
+ 'TIMESTAMP0TZ': 'FEATURE_VALUE TIMESTAMP(0) WITH TIME ZONE',
151
+ 'PERIODTS0': 'FEATURE_VALUE PERIOD(TIMESTAMP(0))',
181
152
  'PERIODTS0TZ': 'FEATURE_VALUE PERIOD(TIMESTAMP(0) WITH TIME ZONE)',
182
- 'DECIMAL' : 'FEATURE_VALUE DECIMAL(38,19)'
153
+ 'DECIMAL': 'FEATURE_VALUE DECIMAL(38,19)'
183
154
  }
184
155
 
185
156
  # Construct the column definitions for the table based on the entity ID
@@ -196,12 +167,14 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
196
167
  # SQL query to create the feature store table
197
168
  if feature_type.lower() == 'ref':
198
169
  partitioning = partitioning.replace('"', "'")
199
- partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),','')
170
+ partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),', '')
200
171
  partitioning = partitioning.replace(
201
172
  f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH})',
202
- '')
173
+ ''
174
+ )
203
175
  substr = extract_partition_content(partitioning.upper())
204
- if len(substr)==0: partitioning = ''
176
+ if len(substr) == 0:
177
+ partitioning = ''
205
178
  query = f"""
206
179
  CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
207
180
  FALLBACK,
@@ -217,7 +190,7 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
217
190
  {partitioning};
218
191
  """
219
192
  else:
220
- partitioning = partitioning.replace('"',"'")
193
+ partitioning = partitioning.replace('"', "'")
221
194
  query = f"""
222
195
  CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
223
196
  FALLBACK,
@@ -266,39 +239,40 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
266
239
 
267
240
  try:
268
241
  # Attempt to execute the create table query
269
- execute_query(query)
270
- execute_query(query3)
242
+ execute_query(query, raise_error=True)
243
+ execute_query(query3, raise_error=True)
271
244
  if tdml.display.print_sqlmr_query:
272
- print(query)
273
- print(query3)
274
- if tdfs4ds.DISPLAY_LOGS: print(f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been created')
275
- #execute_query(query2)
245
+ logger_safe('info', query)
246
+ logger_safe('info', query3)
247
+ logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been created')
248
+ # execute_query(query2)
276
249
  except Exception as e:
277
- # If the table already exists and if_exists is set to 'replace', drop the table and recreate it
278
- print(str(e).split('\n')[0])
279
- if str(e).split('\n')[0].endswith('already exists.') and (if_exists == 'replace'):
280
- execute_query(f'DROP TABLE {tdfs4ds.SCHEMA}.{table_name}')
281
- if tdfs4ds.DISPLAY_LOGS: print(f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been dropped')
250
+ msg = str(e).split('\n')[0]
251
+ logger_safe('error', msg)
252
+ if msg.endswith('already exists.') and (if_exists == 'replace'):
253
+ execute_query(f'DROP TABLE {tdfs4ds.SCHEMA}.{table_name}', raise_error=True)
254
+ logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been dropped')
282
255
  try:
283
256
  # Attempt to recreate the table after dropping it
284
- execute_query(query)
285
- if tdfs4ds.DISPLAY_LOGS: print(f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been re-created')
257
+ execute_query(query, raise_error=True)
258
+ logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been re-created')
286
259
  if tdml.display.print_sqlmr_query:
287
- print(query)
288
- except Exception as e:
289
- print(str(e).split('\n')[0])
260
+ logger_safe('info', query)
261
+ except Exception as e2:
262
+ logger_safe('error', str(e2).split('\n')[0])
290
263
 
291
264
  try:
292
265
  # Attempt to create the view
293
- execute_query(query_view)
266
+ execute_query(query_view, raise_error=True)
294
267
  if tdml.display.print_sqlmr_query:
295
- print(query)
296
- if tdfs4ds.DISPLAY_LOGS: print(f'VIEW {tdfs4ds.SCHEMA}.{view_name} has been created')
268
+ logger_safe('info', query_view)
269
+ logger_safe('info', f'VIEW {tdfs4ds.SCHEMA}.{view_name} has been created')
297
270
  except Exception as e:
298
- print(str(e).split('\n')[0])
271
+ logger_safe('error', str(e).split('\n')[0])
299
272
 
300
273
  return table_name
301
274
 
275
+
302
276
  def register_features(entity_id, feature_names_types, primary_index = None, partitioning = ''):
303
277
  """
304
278
  Orchestrates the registration or update of feature definitions in a Teradata database's feature catalog.
@@ -350,46 +324,47 @@ def register_features(entity_id, feature_names_types, primary_index = None, part
350
324
 
351
325
  def _register_features_merge(entity_id, feature_names_types, primary_index=None, partitioning=''):
352
326
  """
353
- Registers or updates feature definitions in a Teradata database's feature catalog, associating entity identifiers
354
- with feature names, types, and other metadata. This function prepares and executes SQL operations to insert new
355
- feature definitions or update existing ones, considering partitioning strategies and primary index configurations.
356
-
357
- Parameters:
358
- - entity_id (dict): Specifies the entity's identifiers with keys representing attribute names. This dictionary
359
- is crucial for defining the scope and granularity of feature data.
360
- - feature_names_types (dict): Maps feature names to their properties, including data types and unique identifiers.
361
- Each value is a dictionary with keys 'type' and 'id' indicating the feature's data
362
- type and a unique identifier, respectively.
363
- - primary_index (list, optional): Identifies the primary index column(s) for the feature data. This influences
364
- the organization and performance of database operations. If not specified,
365
- defaults are used based on the entity_id structure.
366
- - partitioning (str, optional): Describes the partitioning strategy through a string listing column names used
367
- for partitioning. This can impact data storage and retrieval performance.
368
-
369
- Returns:
370
- pd.DataFrame: Contains details of the registered features, including names, types, IDs, and references to the
371
- respective feature store table and view names, alongside metadata about the entity and database schema.
372
-
373
- Note:
374
- - The function dynamically constructs SQL queries for inserting new features or updating existing ones in the
375
- feature catalog, adapting to the provided partitioning and primary index settings.
376
- - Assumes the existence of a Teradata feature catalog table in the specified schema and that the database connection
377
- is correctly configured.
378
- - Utilizes the tdfs4ds module for database schema configurations and valid-time temporal table considerations.
379
-
380
- Example Usage:
381
- >>> entity_id = {'customer_id': 'INTEGER'}
382
- >>> feature_names_types = {'age': {'type': 'BIGINT', 'id': 1}, 'gender': {'type': 'VARCHAR_LATIN', 'id': 2}}
383
- >>> registered_features = register_features(entity_id, feature_names_types)
384
- >>> print(registered_features)
385
-
386
- This example demonstrates registering features for an entity with attributes customer_id, age, and gender,
387
- where age and gender features have specified types and unique IDs.
327
+ Register or update feature definitions in the feature catalog, with temporal support.
328
+
329
+ This function builds (or refreshes) entries in the Teradata feature catalog from a
330
+ mapping of feature names to their metadata, computes the target feature store table
331
+ and view names, stages the metadata to a temporary table, and executes a MERGE into
332
+ the catalog (with optional VALIDTIME support based on `tdfs4ds.FEATURE_STORE_TIME`).
333
+
334
+ Parameters
335
+ ----------
336
+ entity_id : dict[str, Any]
337
+ Mapping of entity-key column names to types. Only the keys (column names) are
338
+ required here; values are not used by this function.
339
+ feature_names_types : dict[str, dict]
340
+ Dict of feature name -> {"type": <SQL_TYPE>, "id": <int>} describing each
341
+ feature’s storage type and identifier in the catalog.
342
+ primary_index : list[str] | None, optional
343
+ Primary index column(s) to use when deriving the feature store table/view names.
344
+ If None, defaults are inferred by `get_feature_store_table_name`.
345
+ partitioning : str, optional
346
+ Partitioning expression or comma-separated column list used by
347
+ `get_feature_store_table_name`.
348
+
349
+ Returns
350
+ -------
351
+ pd.DataFrame
352
+ A dataframe of the features that were (up)registered, including:
353
+ FEATURE_NAME, FEATURE_TYPE, FEATURE_ID, FEATURE_TABLE, FEATURE_VIEW,
354
+ ENTITY_NAME, FEATURE_DATABASE, DATA_DOMAIN.
355
+
356
+ Notes
357
+ -----
358
+ - When `tdfs4ds.FEATURE_STORE_TIME is None`, uses CURRENT VALIDTIME (non-explicit start/end).
359
+ Otherwise uses `VALIDTIME PERIOD ('<FEATURE_STORE_TIME>', '<END_PERIOD>')` and adds
360
+ the valid-time start/end when inserting.
361
+ - Respects `tdfs4ds.DISPLAY_LOGS` via `logger_safe`.
388
362
  """
389
363
 
390
- if tdfs4ds.FEATURE_STORE_TIME == None:
364
+ # --- VALIDTIME setup -----------------------------------------------------
365
+ if tdfs4ds.FEATURE_STORE_TIME is None:
391
366
  validtime_statement = 'CURRENT VALIDTIME'
392
- validtime_start = 'CAST(CURRENT_TIME AS TIMESTAMP(0) WITH TIME ZONE)'
367
+ validtime_start = "CAST(CURRENT_TIME AS TIMESTAMP(0) WITH TIME ZONE)"
393
368
  else:
394
369
  validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{tdfs4ds.END_PERIOD})'"
395
370
  validtime_start = f"CAST('{tdfs4ds.FEATURE_STORE_TIME}' AS TIMESTAMP(0) WITH TIME ZONE)"
@@ -399,154 +374,174 @@ def _register_features_merge(entity_id, feature_names_types, primary_index=None,
399
374
  else:
400
375
  end_period_ = tdfs4ds.END_PERIOD
401
376
 
402
- if len(list(feature_names_types.keys())) == 0:
403
- if tdfs4ds.DISPLAY_LOGS: print('no new feature to register')
377
+ # --- Input checks & early exit ------------------------------------------
378
+ if not feature_names_types:
379
+ logger_safe("info", "register_features: no new features to register")
404
380
  return
405
381
 
406
- # Create a comma-separated string of entity IDs
407
- entity_id_list = list(entity_id.keys())
408
- entity_id_list.sort()
409
- ENTITY_ID__ = ','.join([k for k in entity_id_list])
382
+ # --- Entity columns (ordered, stable) -----------------------------------
383
+ entity_cols = sorted(list(entity_id.keys()))
384
+ ENTITY_ID__ = ",".join(entity_cols)
410
385
 
411
- # Create a DataFrame from the feature_names_types dictionary
412
- if len(feature_names_types.keys()) > 1:
413
- df = pd.DataFrame(feature_names_types).transpose().reset_index()
414
- df.columns = ['FEATURE_NAME', 'FEATURE_TYPE', 'FEATURE_ID']
415
- else:
416
- df = pd.DataFrame(columns=['FEATURE_NAME', 'FEATURE_TYPE', 'FEATURE_ID'])
417
- k = list(feature_names_types.keys())[0]
418
- df['FEATURE_NAME'] = [k]
419
- df['FEATURE_TYPE'] = [feature_names_types[k]['type']]
420
- df['FEATURE_ID'] = [feature_names_types[k]['id']]
421
-
422
-
423
-
424
- if tdfs4ds.DEBUG_MODE:
425
- print('register_features', 'primary_index', primary_index)
426
- print('register_features', 'partitioning', partitioning)
427
- print('df', df)
428
-
429
- # Generate the feature table and view names based on the entity ID and feature type
430
- df['FEATURE_TABLE'] = df.apply(lambda row: get_feature_store_table_name(entity_id, row.iloc[1],
431
- primary_index=primary_index,
432
- partitioning=partitioning)[0],
433
- axis=1)
434
- df['FEATURE_VIEW'] = df.apply(lambda row: get_feature_store_table_name(entity_id, row.iloc[1],
435
- primary_index=primary_index,
436
- partitioning=partitioning)[1],
437
- axis=1)
438
-
439
- # Add additional columns to the DataFrame
440
- df['ENTITY_NAME'] = ENTITY_ID__
441
- df['FEATURE_DATABASE'] = tdfs4ds.SCHEMA
442
- df['DATA_DOMAIN'] = tdfs4ds.DATA_DOMAIN
443
-
444
- # Copy the DataFrame to a temporary table in Teradata
445
- tdml.copy_to_sql(df, table_name='temp', schema_name=tdfs4ds.SCHEMA, if_exists='replace',
446
- primary_index='FEATURE_ID',
447
- types={'FEATURE_ID': tdml.BIGINT})
448
-
449
-
450
-
451
- if tdfs4ds.DEBUG_MODE:
452
- print("-----------_register_features_merge - df")
453
- print(df)
454
-
455
- if tdfs4ds.FEATURE_STORE_TIME == None:
386
+ # --- Build dataframe safely (no transpose tricks) ------------------------
387
+ rows = []
388
+ for fname, meta in feature_names_types.items():
389
+ try:
390
+ rows.append({
391
+ "FEATURE_NAME": fname,
392
+ "FEATURE_TYPE": meta["type"],
393
+ "FEATURE_ID": meta["id"],
394
+ })
395
+ except KeyError as e:
396
+ logger_safe("error", "register_features: missing key %s in feature '%s' meta=%s", str(e), fname, meta)
397
+ raise
398
+
399
+ df = pd.DataFrame(rows, columns=["FEATURE_NAME", "FEATURE_TYPE", "FEATURE_ID"])
400
+
401
+ logger_safe(
402
+ "debug",
403
+ "register_features: features_count=%d | entity_cols=%s | primary_index=%s | partitioning=%s",
404
+ len(df),
405
+ entity_cols,
406
+ primary_index,
407
+ partitioning,
408
+ )
409
+
410
+ # --- Compute feature table & view names ---------------------------------
411
+ # Use apply to preserve original order; get_feature_store_table_name returns (table, view)
412
+ df["FEATURE_TABLE"] = df.apply(
413
+ lambda row: get_feature_store_table_name(
414
+ entity_id,
415
+ row["FEATURE_TYPE"],
416
+ primary_index=primary_index,
417
+ partitioning=partitioning
418
+ )[0],
419
+ axis=1
420
+ )
421
+ df["FEATURE_VIEW"] = df.apply(
422
+ lambda row: get_feature_store_table_name(
423
+ entity_id,
424
+ row["FEATURE_TYPE"],
425
+ primary_index=primary_index,
426
+ partitioning=partitioning
427
+ )[1],
428
+ axis=1
429
+ )
430
+
431
+ # --- Add catalog columns -------------------------------------------------
432
+ df["ENTITY_NAME"] = ENTITY_ID__
433
+ df["FEATURE_DATABASE"] = tdfs4ds.SCHEMA
434
+ df["DATA_DOMAIN"] = tdfs4ds.DATA_DOMAIN
435
+
436
+ # --- Stage to temp table -------------------------------------------------
437
+ tdml.copy_to_sql(
438
+ df,
439
+ table_name="temp",
440
+ schema_name=tdfs4ds.SCHEMA,
441
+ if_exists="replace",
442
+ primary_index="FEATURE_ID",
443
+ types={"FEATURE_ID": tdml.BIGINT},
444
+ )
445
+ logger_safe("debug", "register_features: staged %d rows to %s.temp", len(df), tdfs4ds.SCHEMA)
446
+
447
+ # --- Build MERGE statement ----------------------------------------------
448
+ if tdfs4ds.FEATURE_STORE_TIME is None:
449
+ # no explicit start/end in INSERT branch
456
450
  query_merge = f"""
457
451
  {validtime_statement}
458
- MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
452
+ MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
459
453
  USING (
460
454
  SELECT
461
- CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
462
- , A.FEATURE_NAME
463
- , A.FEATURE_TYPE
464
- , A.FEATURE_TABLE
465
- , A.FEATURE_DATABASE
466
- , A.FEATURE_VIEW
467
- , A.ENTITY_NAME
468
- , A.DATA_DOMAIN
469
- FROM {tdfs4ds.SCHEMA}.temp A
470
- LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
471
- ON A.FEATURE_NAME = B.FEATURE_NAME
472
- AND A.ENTITY_NAME = B.ENTITY_NAME -- modified
473
- AND A.DATA_DOMAIN = B.DATA_DOMAIN
474
- ) UPDATED_FEATURES
475
- ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
476
- AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
477
- AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
478
- WHEN MATCHED THEN
479
- UPDATE
480
- SET
481
- FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE,
482
- FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE,
483
- FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE,
484
- FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
485
- --,ENTITY_NAME = UPDATED_FEATURES.ENTITY_NAME -- modified
486
- WHEN NOT MATCHED THEN
487
- INSERT
455
+ CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
456
+ , A.FEATURE_NAME
457
+ , A.FEATURE_TYPE
458
+ , A.FEATURE_TABLE
459
+ , A.FEATURE_DATABASE
460
+ , A.FEATURE_VIEW
461
+ , A.ENTITY_NAME
462
+ , A.DATA_DOMAIN
463
+ FROM {tdfs4ds.SCHEMA}.temp A
464
+ LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
465
+ ON A.FEATURE_NAME = B.FEATURE_NAME
466
+ AND A.ENTITY_NAME = B.ENTITY_NAME
467
+ AND A.DATA_DOMAIN = B.DATA_DOMAIN
468
+ ) UPDATED_FEATURES
469
+ ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
470
+ AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
471
+ AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
472
+ WHEN MATCHED THEN UPDATE SET
473
+ FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE
474
+ , FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE
475
+ , FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
476
+ , FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
477
+ WHEN NOT MATCHED THEN INSERT
488
478
  ( UPDATED_FEATURES.FEATURE_ID
489
- , UPDATED_FEATURES.FEATURE_NAME
490
- , UPDATED_FEATURES.FEATURE_TYPE
491
- , UPDATED_FEATURES.FEATURE_TABLE
492
- , UPDATED_FEATURES.FEATURE_DATABASE
493
- , UPDATED_FEATURES.FEATURE_VIEW
494
- , UPDATED_FEATURES.ENTITY_NAME
495
- , UPDATED_FEATURES.DATA_DOMAIN
496
- )
497
- """
479
+ , UPDATED_FEATURES.FEATURE_NAME
480
+ , UPDATED_FEATURES.FEATURE_TYPE
481
+ , UPDATED_FEATURES.FEATURE_TABLE
482
+ , UPDATED_FEATURES.FEATURE_DATABASE
483
+ , UPDATED_FEATURES.FEATURE_VIEW
484
+ , UPDATED_FEATURES.ENTITY_NAME
485
+ , UPDATED_FEATURES.DATA_DOMAIN
486
+ );
487
+ """
498
488
  else:
489
+ # insert with explicit valid-time start/end
499
490
  query_merge = f"""
500
491
  {validtime_statement}
501
- MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
492
+ MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
502
493
  USING (
503
494
  SELECT
504
- CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
505
- , A.FEATURE_NAME
506
- , A.FEATURE_TYPE
507
- , A.FEATURE_TABLE
508
- , A.FEATURE_DATABASE
509
- , A.FEATURE_VIEW
510
- , A.ENTITY_NAME
511
- , A.DATA_DOMAIN
512
- FROM {tdfs4ds.SCHEMA}.temp A
513
- LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
514
- ON A.FEATURE_NAME = B.FEATURE_NAME
515
- AND A.ENTITY_NAME = B.ENTITY_NAME -- modified
516
- AND A.DATA_DOMAIN = B.DATA_DOMAIN
517
- ) UPDATED_FEATURES
518
- ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
519
- AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
520
- AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
521
- WHEN MATCHED THEN
522
- UPDATE
523
- SET
524
- FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE,
525
- FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE,
526
- FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE,
527
- FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
528
- --,ENTITY_NAME = UPDATED_FEATURES.ENTITY_NAME -- modified
529
- WHEN NOT MATCHED THEN
530
- INSERT
495
+ CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
496
+ , A.FEATURE_NAME
497
+ , A.FEATURE_TYPE
498
+ , A.FEATURE_TABLE
499
+ , A.FEATURE_DATABASE
500
+ , A.FEATURE_VIEW
501
+ , A.ENTITY_NAME
502
+ , A.DATA_DOMAIN
503
+ FROM {tdfs4ds.SCHEMA}.temp A
504
+ LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
505
+ ON A.FEATURE_NAME = B.FEATURE_NAME
506
+ AND A.ENTITY_NAME = B.ENTITY_NAME
507
+ AND A.DATA_DOMAIN = B.DATA_DOMAIN
508
+ ) UPDATED_FEATURES
509
+ ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
510
+ AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
511
+ AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
512
+ WHEN MATCHED THEN UPDATE SET
513
+ FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE
514
+ , FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE
515
+ , FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
516
+ , FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
517
+ WHEN NOT MATCHED THEN INSERT
531
518
  ( UPDATED_FEATURES.FEATURE_ID
532
- , UPDATED_FEATURES.FEATURE_NAME
533
- , UPDATED_FEATURES.FEATURE_TYPE
534
- , UPDATED_FEATURES.FEATURE_TABLE
535
- , UPDATED_FEATURES.FEATURE_DATABASE
536
- , UPDATED_FEATURES.FEATURE_VIEW
537
- , UPDATED_FEATURES.ENTITY_NAME
538
- , UPDATED_FEATURES.DATA_DOMAIN,
539
- {validtime_start},
540
- '{end_period_}')
541
- """
519
+ , UPDATED_FEATURES.FEATURE_NAME
520
+ , UPDATED_FEATURES.FEATURE_TYPE
521
+ , UPDATED_FEATURES.FEATURE_TABLE
522
+ , UPDATED_FEATURES.FEATURE_DATABASE
523
+ , UPDATED_FEATURES.FEATURE_VIEW
524
+ , UPDATED_FEATURES.ENTITY_NAME
525
+ , UPDATED_FEATURES.DATA_DOMAIN
526
+ , {validtime_start}
527
+ , '{end_period_}'
528
+ );
529
+ """
542
530
 
543
- if tdfs4ds.DEBUG_MODE:
544
- print("-----------_register_features_merge - query_merge")
545
- print(query_merge)
546
- # Execute the update and insert queries
531
+ logger_safe("debug", "register_features: merge_sql_preview=%s", " ".join(query_merge.split())[:400] + " ...")
532
+
533
+ # --- Execute MERGE -------------------------------------------------------
547
534
  execute_query(query_merge)
535
+ logger_safe(
536
+ "info",
537
+ "register_features: merged %d features into %s.%s",
538
+ len(df),
539
+ tdfs4ds.SCHEMA,
540
+ tdfs4ds.FEATURE_CATALOG_NAME,
541
+ )
548
542
 
549
543
  return df
544
+
550
545
  def _register_features_update_insert(entity_id, feature_names_types, primary_index = None, partitioning = ''):
551
546
  """
552
547
  Registers or updates feature definitions in a Teradata database's feature catalog, associating entity identifiers
@@ -869,39 +864,6 @@ def Gettdtypes(tddf, features_columns, entity_id):
869
864
  # Increment the feature ID for the next iteration.
870
865
  feature_id += 1
871
866
 
872
- # # Iterate over the data types of the columns in the DataFrame.
873
- # for k, v in types.items():
874
- # # If the column name does not exist in the feature catalog table and is in the list of feature column names...
875
- # if k.upper() not in [n.upper() for n in existing_features] and k.upper() in [n.upper() for n in features_columns]:
876
- # # If the data type of the column is integer...
877
- # if 'int' in str(v.lower()):
878
- # # Add an entry to the result dictionary for the column name with its data type and new feature ID.
879
- # res[k] = {'type': 'BIGINT', 'id': feature_id}
880
- # # If the data type of the column is float...
881
- # elif 'float' in str(v.lower()):
882
- # # Add an entry to the result dictionary for the column name with its data type and new feature ID.
883
- # res[k] = {'type': 'FLOAT', 'id': feature_id}
884
- # # If the data type of the column is varchar with unicode encoding ...
885
- # elif 'unicode' in str(v.lower()):
886
- # res[k] = {'type': 'VARCHAR_UNICODE', 'id': feature_id}
887
- # # Print a message that the data type is not yet managed.
888
- # #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
889
- # # If the data type of the column is varchar with unicode encoding ...
890
- # elif 'latin' in str(v.lower()):
891
- # res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
892
- # # Print a message that the data type is not yet managed.
893
- # #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
894
- # elif 'decimal' in str(v.lower()):
895
- # res[k] = {'type': 'DECIMAL', 'id': feature_id}
896
- # # Print a message that the data type is not yet managed.
897
- # # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
898
- # else:
899
- # res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
900
- # # Print a message that the data type is not yet managed.
901
- # # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
902
- # # Increment the feature ID for the next iteration.
903
- # feature_id += 1
904
-
905
867
  # Return the result dictionary.
906
868
  return res
907
869
 
@@ -979,36 +941,50 @@ def tdstone2_Gettdtypes(existing_model, entity_id, display_logs=False):
979
941
  # Return the dictionary containing feature names, types, and IDs.
980
942
  return res
981
943
 
982
- def delete_feature(feature_name, data_domain=None):
944
+ def delete_feature(feature_name, entity_id, data_domain=None):
983
945
  """
984
- Delete the values of a specific feature from the feature table within a given data domain.
946
+ Delete the values of a specific feature for given entities from the feature table
947
+ within a specified data domain.
985
948
 
986
949
  This function constructs and executes two SQL queries against a Teradata database
987
- to remove a feature specified by its name. The first query retrieves the table name
988
- where the feature resides, based on the feature name and data domain. The second query
989
- deletes the feature from the identified table.
950
+ to remove a feature specified by its name and entity identifiers. The first query
951
+ retrieves the table name where the feature resides, based on the feature name,
952
+ entity, and data domain. The second query deletes the feature values from the
953
+ identified table.
990
954
 
991
955
  Parameters:
992
956
  - feature_name (str): The name of the feature to be removed.
993
- - data_domain (str, optional): The data domain where the feature is located. If not specified,
994
- the function uses the default data domain defined in tdfs4ds.DATA_DOMAIN.
995
-
996
- The function checks if the DEBUG_MODE flag in the tdfs4ds module is set to True. If so,
997
- it prints the SQL queries and the resolved table name for debugging purposes.
957
+ - entity_id (str or list of str): Entity identifier(s). If a string is provided,
958
+ it will be converted to a single-element list. The list is always sorted
959
+ alphabetically before use.
960
+ - data_domain (str, optional): The data domain where the feature is located.
961
+ If not specified, the function uses the default data domain defined in
962
+ `tdfs4ds.DATA_DOMAIN`.
963
+
964
+ Behavior:
965
+ - The function checks if the `DEBUG_MODE` flag in the `tdfs4ds` module is set to True.
966
+ If so, it prints the generated SQL queries and the resolved table name for debugging.
967
+ - If the feature table cannot be resolved, the function returns without executing
968
+ a delete query.
998
969
 
999
- The function does not return any value.
970
+ Returns:
971
+ - None
1000
972
 
1001
973
  Note:
1002
974
  - The function assumes the presence of a module `tdfs4ds` with predefined constants
1003
- such as `DATA_DOMAIN`, `SCHEMA`, `FEATURE_CATALOG_NAME`, and a flag `DEBUG_MODE`.
975
+ such as `DATA_DOMAIN`, `SCHEMA`, `FEATURE_CATALOG_NAME_VIEW`, and a flag `DEBUG_MODE`.
1004
976
  - It also assumes a `tdml` module or object with an `execute_sql` method capable of
1005
977
  executing SQL queries against a Teradata database and fetching the results.
1006
978
 
1007
979
  Raises:
1008
- - This function might raise exceptions related to SQL execution or connection issues,
1009
- which are not explicitly handled within the function itself.
980
+ - Exceptions related to SQL execution or connection issues may be raised but are not
981
+ explicitly handled, except for printing the error message.
1010
982
  """
1011
983
 
984
+ if isinstance(entity_id, str):
985
+ entity_id = [entity_id]
986
+ entity_id = sorted(entity_id)
987
+
1012
988
  if data_domain is None:
1013
989
  data_domain = tdfs4ds.DATA_DOMAIN
1014
990
 
@@ -1016,19 +992,21 @@ def delete_feature(feature_name, data_domain=None):
1016
992
  SEL FEATURE_DATABASE||'.'||FEATURE_TABLE AS TABLE_NAME
1017
993
  FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
1018
994
  WHERE FEATURE_NAME = '{feature_name}'
1019
- AND DATA_DOMAIN = '{data_domain}'"""
995
+ AND DATA_DOMAIN = '{data_domain}'
996
+ AND ENTITY_NAME = '{','.join([e.upper() for e in entity_id])}'"""
1020
997
  if tdfs4ds.DEBUG_MODE:
1021
998
  print(query0)
1022
999
 
1023
1000
  table_name = tdml.execute_sql(query0).fetchall()
1024
- if len(table_name)>0:
1001
+ if len(table_name) > 0:
1025
1002
  table_name = table_name[0][0]
1026
1003
  else:
1027
1004
  return
1028
1005
  if tdfs4ds.DEBUG_MODE:
1029
1006
  print('table name : ', table_name)
1007
+
1030
1008
  query = f"""
1031
- DELETE {table_name}
1009
+ NONSEQUENCED VALIDTIME DELETE {table_name}
1032
1010
  WHERE FEATURE_ID = (
1033
1011
  SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
1034
1012
  WHERE FEATURE_NAME = '{feature_name}'
@@ -1044,6 +1022,7 @@ def delete_feature(feature_name, data_domain=None):
1044
1022
 
1045
1023
  return
1046
1024
 
1025
+
1047
1026
  def remove_feature(feature_name, entity_id, data_domain=None):
1048
1027
  """
1049
1028
  Attempts to remove a specific feature from the feature catalog and any associated data,
@@ -1060,7 +1039,9 @@ def remove_feature(feature_name, entity_id, data_domain=None):
1060
1039
 
1061
1040
  Parameters:
1062
1041
  - feature_name (str): The name of the feature to be removed.
1063
- - entity_id (list of str): A list of entity identifiers associated with the feature.
1042
+ - entity_id (str or list of str): Entity identifier(s). If a string is provided,
1043
+ it will be converted to a single-element list. The list is always sorted
1044
+ alphabetically before use.
1064
1045
  - data_domain (str, optional): The data domain where the feature is located. If not provided,
1065
1046
  the function uses the default data domain from the `tdfs4ds.DATA_DOMAIN` setting.
1066
1047
 
@@ -1084,16 +1065,19 @@ def remove_feature(feature_name, entity_id, data_domain=None):
1084
1065
  - SQL execution or connection exceptions might occur but are not explicitly handled by this function.
1085
1066
  """
1086
1067
 
1068
+ if isinstance(entity_id, str):
1069
+ entity_id = [entity_id]
1070
+ entity_id = sorted(entity_id)
1071
+
1087
1072
  if data_domain is None:
1088
1073
  data_domain = tdfs4ds.DATA_DOMAIN
1089
1074
 
1090
1075
  try:
1091
- delete_feature(feature_name, data_domain)
1076
+ delete_feature(feature_name, entity_id, data_domain)
1092
1077
  except Exception as e:
1093
1078
  print(str(e).split('\n')[0])
1094
1079
  return
1095
1080
 
1096
- entity_id.sort()
1097
1081
  query = f"""
1098
1082
  NONSEQUENCED VALIDTIME DELETE {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME}
1099
1083
  WHERE FEATURE_NAME = '{feature_name}'
@@ -1102,7 +1086,6 @@ def remove_feature(feature_name, entity_id, data_domain=None):
1102
1086
  """
1103
1087
  if tdfs4ds.DEBUG_MODE:
1104
1088
  print(query)
1089
+
1105
1090
  tdml.execute_sql(query)
1106
1091
  return
1107
-
1108
-