tdfs4ds 0.2.4.35__py3-none-any.whl → 0.2.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.35'
1
+ __version__ = '0.2.4.36'
2
2
  import logging
3
3
 
4
4
  # Setup the logger
@@ -562,6 +562,81 @@ def _upload_features(
562
562
  process_id=None, force_compute=False,
563
563
  force_varchar_length=None
564
564
  ):
565
+ """
566
+ Uploads a set of features into the Feature Store for a given entity.
567
+
568
+ This function registers an entity and its associated features in the feature catalog
569
+ if they are not already defined, prepares the data for ingestion, and stores it in the
570
+ feature store. It also supports incremental feature computation and conditional execution
571
+ depending on prior runs.
572
+
573
+ Parameters
574
+ ----------
575
+ df : pandas.DataFrame
576
+ Input dataframe containing entity keys and feature columns to upload.
577
+ entity_id : str, list, or dict
578
+ Identifier(s) for the entity. Can be:
579
+ - A string (single entity key)
580
+ - A list of key column names
581
+ - A dict mapping column names to data types
582
+ If not a dict, entity metadata is inferred automatically.
583
+ feature_names : list of str
584
+ List of feature column names to upload from `df`.
585
+ feature_versions : dict or int, optional
586
+ Feature version(s). If a single integer is provided, it is applied to all features.
587
+ If a dict is provided, it maps each feature name to its version.
588
+ Default is FEATURE_VERSION_DEFAULT.
589
+ primary_index : str or list, optional
590
+ Primary index to use when storing features in Teradata.
591
+ partitioning : str, optional
592
+ Partitioning clause for feature store tables. Default is ''.
593
+ filtermanager : FilterManager, optional
594
+ If provided, features are built iteratively per filter step.
595
+ entity_null_substitute : dict, optional
596
+ Replacement values for nulls in entity keys.
597
+ Example: {'customer_id': -1}
598
+ process_id : str, optional
599
+ Identifier for the process execution, used for follow-up logging.
600
+ force_compute : bool, optional
601
+ If True, forces recomputation even if the same process_id and timestamp were
602
+ already computed earlier. If False, the computation is skipped when existing
603
+ results are detected. Default is False.
604
+ force_varchar_length : int, optional
605
+ If provided, all VARCHAR feature columns are resized to this length
606
+ before ingestion.
607
+
608
+ Returns
609
+ -------
610
+ pandas.DataFrame or None
611
+ If BUILD_DATASET_AT_UPLOAD is enabled, returns a dataset built from the
612
+ ingested features for validation. Otherwise, returns None.
613
+
614
+ Notes
615
+ -----
616
+ - Uses global tdfs4ds context such as FEATURE_STORE_TIME, RUN_ID, and PROCESS_TYPE.
617
+ - Logs ingestion status in process follow-up tables.
618
+ - Skips ingestion when existing completed results are found unless
619
+ `force_compute=True`.
620
+ - Applies Teradata-optimized storage and statistics collection.
621
+
622
+ Raises
623
+ ------
624
+ ValueError
625
+ If unsupported data types are found (CLOB/BLOB/JSON).
626
+ Exception
627
+ For ingestion failure or storage errors.
628
+
629
+ Example
630
+ -------
631
+ >>> _upload_features(
632
+ ... df=dataframe,
633
+ ... entity_id="customer_id",
634
+ ... feature_names=["age", "credit_score"],
635
+ ... process_id="customer_features_v1",
636
+ ... force_compute=False
637
+ ... )
638
+ """
639
+
565
640
  from tdfs4ds.feature_store.entity_management import register_entity
566
641
  from tdfs4ds.feature_store.feature_store_management import Gettdtypes
567
642
  from tdfs4ds.feature_store.feature_store_management import register_features
@@ -633,6 +708,12 @@ def _upload_features(
633
708
 
634
709
  if filtermanager is None:
635
710
  do_compute = not (process_id and follow_up is not None and follow_up.shape[0] > 0)
711
+ if not do_compute and not force_compute:
712
+ logger_safe(
713
+ "info",
714
+ "Skipping computation for process_id=%s at time %s (already exists, force_compute=False)",
715
+ process_id, tdfs4ds.FEATURE_STORE_TIME
716
+ )
636
717
  if do_compute or force_compute:
637
718
  logger_safe("info", "Beginning feature ingestion for entity=%s", entity_id)
638
719
  tdfs4ds.process_store.process_followup.followup_open(
@@ -670,6 +751,7 @@ def _upload_features(
670
751
  raise
671
752
 
672
753
  else:
754
+
673
755
  logger_safe("info", "FilterManager detected: %s filters to process", filtermanager.nb_filters)
674
756
  something_computed = False
675
757
  for i in tqdm(
@@ -704,6 +786,12 @@ def _upload_features(
704
786
  if follow_up.shape[0] > 0:
705
787
  do_compute = False
706
788
 
789
+ if not do_compute and not force_compute:
790
+ logger_safe(
791
+ "info",
792
+ "Skipping computation for process_id=%s at time %s (already exists, force_compute=False)",
793
+ process_id, tdfs4ds.FEATURE_STORE_TIME
794
+ )
707
795
  if do_compute or force_compute:
708
796
  tdfs4ds.process_store.process_followup.followup_open(
709
797
  run_id = tdfs4ds.RUN_ID,
@@ -1179,41 +1267,50 @@ def upload_tdstone2_scores(model):
1179
1267
  return dataset
1180
1268
 
1181
1269
 
1182
- def roll_out(process_list, time_manager, time_id_start = 1, time_id_end = None):
1270
+ def roll_out(process_list, time_manager, time_id_start = 1, time_id_end = None, force_compute = False, force_display_logs = False):
1183
1271
  """
1184
- Executes a series of processes for each date in a given list, managing the time and logging settings.
1272
+ Executes a series of processes for each date in a given list, managing time, computation settings, and logging.
1185
1273
 
1186
1274
  This function iterates over a range of time steps, updating a TimeManager object with each step, and then
1187
- executes a list of processes for that time step. It also manages the synchronization of time for a feature store
1188
- and disables display logs during its execution.
1275
+ executes a list of processes for that time step. It also manages synchronization of time for the feature store
1276
+ and optionally controls forced computation and log display behavior.
1189
1277
 
1190
1278
  Parameters:
1191
1279
  - process_list (list): A list of process IDs that need to be executed for each time step.
1192
- - time_manager (TimeManager object): An object that manages time-related operations, like updating or retrieving time.
1280
+ - time_manager (TimeManager): An object that manages time-related operations, like updating or retrieving time.
1193
1281
  - time_id_start (int, optional): The starting time step ID. Default is 1.
1194
- - time_id_end (int, optional): The ending time step ID. If None, it will run until the last time step in the time manager.
1282
+ - time_id_end (int, optional): The ending time step ID. If None, it will run until the last time step in the
1283
+ time manager.
1284
+ - force_compute (bool, optional): If True, forces each process to recompute even if previous results exist.
1285
+ Default is False.
1286
+ - force_display_logs (bool, optional): If True, forces log display during the rollout even if global log display
1287
+ is disabled. Default is False.
1195
1288
 
1196
1289
  Side Effects:
1197
- - Sets global variables DISPLAY_LOGS and FEATURE_STORE_TIME.
1290
+ - Temporarily modifies global variables DISPLAY_LOGS, PROCESS_TYPE, RUN_ID, and FEATURE_STORE_TIME.
1291
+ - Restores DISPLAY_LOGS setting after execution.
1198
1292
  - Catches and prints exceptions along with the time step on which they occurred.
1199
1293
 
1200
- This function performs the following steps:
1201
- 1. Disables display logs and sets the process type to 'ROLL_OUT'.
1202
- 2. Iterates over the specified range of time steps.
1203
- 3. Updates the time manager with the current time step.
1204
- 4. Synchronizes the feature store time with the current time step.
1205
- 5. Executes each process in the process list for the current time step.
1206
- 6. Restores the original display log setting after execution.
1294
+ Steps performed:
1295
+ 1. Disables display logs by default unless `force_display_logs` is True.
1296
+ 2. Sets process type to 'ROLL_OUT' and initializes a unique run ID.
1297
+ 3. Iterates over the specified range of time steps.
1298
+ 4. Updates the time manager with the current time step.
1299
+ 5. Synchronizes the feature store time with the current time step.
1300
+ 6. Executes each process in the process list with optional forced computation.
1301
+ 7. Restores original display log settings after completion.
1207
1302
 
1208
1303
  Example:
1209
1304
  >>> process_list = ['process_1', 'process_2']
1210
1305
  >>> time_manager = TimeManager(...)
1211
- >>> roll_out(process_list, time_manager, time_id_start=1, time_id_end=10)
1306
+ >>> roll_out(process_list, time_manager, time_id_start=1, time_id_end=10, force_compute=True, force_display_logs=True)
1212
1307
  """
1213
1308
 
1214
1309
  # Disable display logs
1215
1310
  temp_DISPLAY_LOGS = tdfs4ds.DISPLAY_LOGS
1216
1311
  tdfs4ds.DISPLAY_LOGS = False
1312
+ if force_display_logs:
1313
+ tdfs4ds.DISPLAY_LOGS = True
1217
1314
  PROCESS_TYPE = tdfs4ds.PROCESS_TYPE
1218
1315
  tdfs4ds.PROCESS_TYPE = 'ROLL_OUT'
1219
1316
  tdfs4ds.RUN_ID = str(uuid.uuid4())
@@ -1246,7 +1343,7 @@ def roll_out(process_list, time_manager, time_id_start = 1, time_id_end = None):
1246
1343
  # Execute all processes for this time step
1247
1344
  for proc_id in process_list:
1248
1345
  pbar.set_description(f"Processing {date_} | proc {proc_id}")
1249
- run(process_id=proc_id, force_compute=False)
1346
+ run(process_id=proc_id, force_compute=force_compute)
1250
1347
 
1251
1348
  # Restore settings
1252
1349
  tdfs4ds.DISPLAY_LOGS = temp_DISPLAY_LOGS
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.35
3
+ Version: 0.2.4.36
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=n3eGxALMqT_UmwvP_VZ8K0bdKSFAtPhe9bi7Kg0TQtA,55698
5
+ tdfs4ds/__init__.py,sha256=Smyg37GXuUsUkaV1RqG7fhMy6-h6bRkpq7bDG_WGYTs,60077
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
32
32
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
33
33
  tdfs4ds/utils/time_management.py,sha256=asIWvK5K81NNwAGqC-9Tv4Timscxyv0vyuPFs01whu0,31461
34
34
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
35
- tdfs4ds-0.2.4.35.dist-info/METADATA,sha256=1gmDbv0lpgEcRd0ucWdSSyfGUTyb0-nCxVoMy9Y8JKk,14326
36
- tdfs4ds-0.2.4.35.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
37
- tdfs4ds-0.2.4.35.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
38
- tdfs4ds-0.2.4.35.dist-info/RECORD,,
35
+ tdfs4ds-0.2.4.36.dist-info/METADATA,sha256=wKUqiK1ohnWfe1ZSrgLq_4OXnyEh4K9I5Jm5PfRkaHs,14326
36
+ tdfs4ds-0.2.4.36.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
37
+ tdfs4ds-0.2.4.36.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
38
+ tdfs4ds-0.2.4.36.dist-info/RECORD,,