PyPI - tdfs4ds - Versions diffs - 0.2.4.35__py3-none-any.whl → 0.2.4.37__py3-none-any.whl - Mend

tdfs4ds 0.2.4.35py3-none-any.whl → 0.2.4.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

tdfs4ds/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
-__version__ = '0.2.4.35'
+__version__ = '0.2.4.37'
 import logging
+import json
 # Setup the logger
 logging.basicConfig(
@@ -562,6 +563,81 @@ def _upload_features(
     process_id=None, force_compute=False,
     force_varchar_length=None
 ):
+    """
+    Uploads a set of features into the Feature Store for a given entity.
+    This function registers an entity and its associated features in the feature catalog
+    if they are not already defined, prepares the data for ingestion, and stores it in the
+    feature store. It also supports incremental feature computation and conditional execution
+    depending on prior runs.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Input dataframe containing entity keys and feature columns to upload.
+    entity_id : str, list, or dict
+        Identifier(s) for the entity. Can be:
+            - A string (single entity key)
+            - A list of key column names
+            - A dict mapping column names to data types
+        If not a dict, entity metadata is inferred automatically.
+    feature_names : list of str
+        List of feature column names to upload from `df`.
+    feature_versions : dict or int, optional
+        Feature version(s). If a single integer is provided, it is applied to all features.
+        If a dict is provided, it maps each feature name to its version.
+        Default is FEATURE_VERSION_DEFAULT.
+    primary_index : str or list, optional
+        Primary index to use when storing features in Teradata.
+    partitioning : str, optional
+        Partitioning clause for feature store tables. Default is ''.
+    filtermanager : FilterManager, optional
+        If provided, features are built iteratively per filter step.
+    entity_null_substitute : dict, optional
+        Replacement values for nulls in entity keys.
+        Example: {'customer_id': -1}
+    process_id : str, optional
+        Identifier for the process execution, used for follow-up logging.
+    force_compute : bool, optional
+        If True, forces recomputation even if the same process_id and timestamp were
+        already computed earlier. If False, the computation is skipped when existing
+        results are detected. Default is False.
+    force_varchar_length : int, optional
+        If provided, all VARCHAR feature columns are resized to this length
+        before ingestion.
+    Returns
+    -------
+    pandas.DataFrame or None
+        If BUILD_DATASET_AT_UPLOAD is enabled, returns a dataset built from the
+        ingested features for validation. Otherwise, returns None.
+    Notes
+    -----
+    - Uses global tdfs4ds context such as FEATURE_STORE_TIME, RUN_ID, and PROCESS_TYPE.
+    - Logs ingestion status in process follow-up tables.
+    - Skips ingestion when existing completed results are found unless
+      `force_compute=True`.
+    - Applies Teradata-optimized storage and statistics collection.
+    Raises
+    ------
+    ValueError
+        If unsupported data types are found (CLOB/BLOB/JSON).
+    Exception
+        For ingestion failure or storage errors.
+    Example
+    -------
+    >>> _upload_features(
+    ...     df=dataframe,
+    ...     entity_id="customer_id",
+    ...     feature_names=["age", "credit_score"],
+    ...     process_id="customer_features_v1",
+    ...     force_compute=False
+    ... )
+    """
     from tdfs4ds.feature_store.entity_management        import register_entity
     from tdfs4ds.feature_store.feature_store_management import Gettdtypes
     from tdfs4ds.feature_store.feature_store_management import register_features
@@ -633,6 +709,12 @@ def _upload_features(
     if filtermanager is None:
         do_compute = not (process_id and follow_up is not None and follow_up.shape[0] > 0)
+        if not do_compute and not force_compute:
+            logger_safe(
+                "info",
+                "Skipping computation for process_id=%s at time %s (already exists, force_compute=False)",
+                process_id, tdfs4ds.FEATURE_STORE_TIME
+            )
         if do_compute or force_compute:
             logger_safe("info", "Beginning feature ingestion for entity=%s", entity_id)
             tdfs4ds.process_store.process_followup.followup_open(
@@ -670,26 +752,44 @@ def _upload_features(
                 raise
     else:
         logger_safe("info", "FilterManager detected: %s filters to process", filtermanager.nb_filters)
         something_computed = False
-        for i in tqdm(
+        pbar = tqdm(
             range(filtermanager.nb_filters),
             total=filtermanager.nb_filters,
             desc="Applying filters",
             unit="filter",
             leave=False
-        ):
+        )
+        for i in pbar:
             filter_id = i + 1
             filtermanager.update(filter_id)
-            # show which filter is being applied in the bar
             try:
-                tqdm.write(f"Applying filter {filter_id}/{filtermanager.nb_filters}")
-                # If display() returns a long string, you can shorten it:
-                bar_info = str(filtermanager.display())
-                if len(bar_info) > 80:
-                    bar_info = bar_info[:77] + "..."
-                tqdm.tqdm._instances and next(iter(tqdm.tqdm._instances)).set_postfix_str(bar_info)
+                pbar.set_description(f"Applying filter {filter_id}/{filtermanager.nb_filters}")
+                # Convert datetime columns to string
+                df_bar = filtermanager.display().to_pandas().astype(object)  # avoid conversion issues
+                for col in df_bar.select_dtypes(include=["datetime", "datetimetz"]).columns:
+                    df_bar[col] = df_bar[col].dt.strftime("%Y-%m-%d %H:%M:%S")
+                # Convert to JSON object (dict)
+                bar_info = df_bar.iloc[0].to_dict()
+                # ---- ADD THIS: handle python date objects ----
+                from datetime import date, datetime
+                for key, value in bar_info.items():
+                    if isinstance(value, (date, datetime)):  # convert date/datetime to string
+                        bar_info[key] = value.strftime("%Y-%m-%d %H:%M:%S")
+                # ----------------------------------------------
+                bar_info = str(bar_info)
+                if len(bar_info) > 120:
+                    bar_info = bar_info[:117] + "..."
+                pbar.set_postfix_str(bar_info)
             except Exception:
                 # postfix is optional; ignore errors from display() here
                 pass
@@ -704,6 +804,12 @@ def _upload_features(
                 if follow_up.shape[0] > 0:
                     do_compute = False
+            if not do_compute and not force_compute:
+                logger_safe(
+                    "info",
+                    "Skipping computation for process_id=%s at time %s (already exists, force_compute=False)",
+                    process_id, tdfs4ds.FEATURE_STORE_TIME
+                )
             if do_compute or force_compute:
                 tdfs4ds.process_store.process_followup.followup_open(
                     run_id        = tdfs4ds.RUN_ID,
@@ -1179,41 +1285,50 @@ def upload_tdstone2_scores(model):
     return dataset
-def roll_out(process_list, time_manager, time_id_start = 1, time_id_end = None):
+def roll_out(process_list, time_manager, time_id_start = 1, time_id_end = None, force_compute = False, force_display_logs = False):
     """
-    Executes a series of processes for each date in a given list, managing the time and logging settings.
+    Executes a series of processes for each date in a given list, managing time, computation settings, and logging.
     This function iterates over a range of time steps, updating a TimeManager object with each step, and then
-    executes a list of processes for that time step. It also manages the synchronization of time for a feature store
-    and disables display logs during its execution.
+    executes a list of processes for that time step. It also manages synchronization of time for the feature store
+    and optionally controls forced computation and log display behavior.
     Parameters:
     - process_list (list): A list of process IDs that need to be executed for each time step.
-    - time_manager (TimeManager object): An object that manages time-related operations, like updating or retrieving time.
+    - time_manager (TimeManager): An object that manages time-related operations, like updating or retrieving time.
     - time_id_start (int, optional): The starting time step ID. Default is 1.
-    - time_id_end (int, optional): The ending time step ID. If None, it will run until the last time step in the time manager.
+    - time_id_end (int, optional): The ending time step ID. If None, it will run until the last time step in the
+      time manager.
+    - force_compute (bool, optional): If True, forces each process to recompute even if previous results exist.
+      Default is False.
+    - force_display_logs (bool, optional): If True, forces log display during the rollout even if global log display
+      is disabled. Default is False.
     Side Effects:
-    - Sets global variables DISPLAY_LOGS and FEATURE_STORE_TIME.
+    - Temporarily modifies global variables DISPLAY_LOGS, PROCESS_TYPE, RUN_ID, and FEATURE_STORE_TIME.
+    - Restores DISPLAY_LOGS setting after execution.
     - Catches and prints exceptions along with the time step on which they occurred.
-    This function performs the following steps:
-    1. Disables display logs and sets the process type to 'ROLL_OUT'.
-    2. Iterates over the specified range of time steps.
-    3. Updates the time manager with the current time step.
-    4. Synchronizes the feature store time with the current time step.
-    5. Executes each process in the process list for the current time step.
-    6. Restores the original display log setting after execution.
+    Steps performed:
+    1. Disables display logs by default unless `force_display_logs` is True.
+    2. Sets process type to 'ROLL_OUT' and initializes a unique run ID.
+    3. Iterates over the specified range of time steps.
+    4. Updates the time manager with the current time step.
+    5. Synchronizes the feature store time with the current time step.
+    6. Executes each process in the process list with optional forced computation.
+    7. Restores original display log settings after completion.
     Example:
     >>> process_list = ['process_1', 'process_2']
     >>> time_manager = TimeManager(...)
-    >>> roll_out(process_list, time_manager, time_id_start=1, time_id_end=10)
+    >>> roll_out(process_list, time_manager, time_id_start=1, time_id_end=10, force_compute=True, force_display_logs=True)
     """
     # Disable display logs
     temp_DISPLAY_LOGS = tdfs4ds.DISPLAY_LOGS
     tdfs4ds.DISPLAY_LOGS = False
+    if force_display_logs:
+        tdfs4ds.DISPLAY_LOGS = True
     PROCESS_TYPE = tdfs4ds.PROCESS_TYPE
     tdfs4ds.PROCESS_TYPE = 'ROLL_OUT'
     tdfs4ds.RUN_ID = str(uuid.uuid4())
@@ -1246,7 +1361,7 @@ def roll_out(process_list, time_manager, time_id_start = 1, time_id_end = None):
             # Execute all processes for this time step
             for proc_id in process_list:
                 pbar.set_description(f"Processing {date_} | proc {proc_id}")
-                run(process_id=proc_id, force_compute=False)
+                run(process_id=proc_id, force_compute=force_compute)
         # Restore settings
         tdfs4ds.DISPLAY_LOGS = temp_DISPLAY_LOGS

{tdfs4ds-0.2.4.35.dist-info → tdfs4ds-0.2.4.37.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tdfs4ds
-Version: 0.2.4.35
+Version: 0.2.4.37
 Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
 Author: Denis Molin
 Requires-Python: >=3.6

{tdfs4ds-0.2.4.35.dist-info → tdfs4ds-0.2.4.37.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
 tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
 tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
 tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
-tdfs4ds/__init__.py,sha256=n3eGxALMqT_UmwvP_VZ8K0bdKSFAtPhe9bi7Kg0TQtA,55698
+tdfs4ds/__init__.py,sha256=mrvk5jKmcdYg4waC7bpCHsDGlPb8h1JmSgx3dBlo1Ow,60776
 tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
 tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
 tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
 tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
 tdfs4ds/utils/time_management.py,sha256=asIWvK5K81NNwAGqC-9Tv4Timscxyv0vyuPFs01whu0,31461
 tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
-tdfs4ds-0.2.4.35.dist-info/METADATA,sha256=1gmDbv0lpgEcRd0ucWdSSyfGUTyb0-nCxVoMy9Y8JKk,14326
-tdfs4ds-0.2.4.35.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-tdfs4ds-0.2.4.35.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
-tdfs4ds-0.2.4.35.dist-info/RECORD,,
+tdfs4ds-0.2.4.37.dist-info/METADATA,sha256=kXimo1unejodaf9W0tOBPLaKisjougkurjt5C9IpA6g,14326
+tdfs4ds-0.2.4.37.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+tdfs4ds-0.2.4.37.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
+tdfs4ds-0.2.4.37.dist-info/RECORD,,

{tdfs4ds-0.2.4.35.dist-info → tdfs4ds-0.2.4.37.dist-info}/WHEEL RENAMED Viewed

File without changes

{tdfs4ds-0.2.4.35.dist-info → tdfs4ds-0.2.4.37.dist-info}/top_level.txt RENAMED Viewed

File without changes

tdfs4ds 0.2.4.35__py3-none-any.whl → 0.2.4.37__py3-none-any.whl

tdfs4ds 0.2.4.35py3-none-any.whl → 0.2.4.37py3-none-any.whl