PyPI - tdfs4ds - Versions diffs - 0.2.4.25__py3-none-any.whl → 0.2.4.41__py3-none-any.whl - Mend

tdfs4ds 0.2.4.25py3-none-any.whl → 0.2.4.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

tdfs4ds/__init__.py +586 -564
tdfs4ds/feature_store/feature_data_processing.py +367 -299
tdfs4ds/feature_store/feature_query_retrieval.py +105 -52
tdfs4ds/feature_store/feature_store_management.py +268 -285
tdfs4ds/process_store/process_followup.py +113 -2
tdfs4ds/process_store/process_query_administration.py +1 -1
tdfs4ds/process_store/process_registration_management.py +67 -55
tdfs4ds/process_store/process_store_catalog_management.py +2 -2
tdfs4ds/utils/filter_management.py +521 -138
tdfs4ds/utils/query_management.py +18 -40
tdfs4ds/utils/time_management.py +547 -97
{tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/METADATA +1 -1
{tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/RECORD +15 -15
{tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/WHEEL +0 -0
{tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/top_level.txt +0 -0

tdfs4ds/feature_store/feature_store_management.py CHANGED Viewed

@@ -9,6 +9,7 @@ import pandas as pd
 import tqdm
 import inspect
 import re
+from tdfs4ds import logger_safe, logger
 @execute_query_wrapper
 def feature_store_catalog_view_creation():
@@ -129,57 +130,27 @@ def feature_store_catalog_creation(if_exists='replace', comment='this table is a
     return tdfs4ds.FEATURE_CATALOG_NAME
-def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index = None, partitioning = ''):
+def feature_store_table_creation(entity_id, feature_type, if_exists='fail', primary_index=None, partitioning=''):
     """
     Creates a table and a corresponding view for feature storage in a Teradata database schema, based on specified entity ID and feature type.
-    This function automates the creation of a table and view tailored for storing features in a structured manner. It leverages provided entity identifiers and feature types to generate table and view names dynamically, integrating with an existing feature catalog for consistency and reference. The table and view are created with considerations for primary indexing and optional partitioning strategies to optimize data management and access.
-    Parameters:
-    - entity_id (dict): Maps column names to their respective data types, defining the structure of the entity identifier(s).
-    - feature_type (str): Specifies the data type of the feature (e.g., 'FLOAT', 'BIGINT', 'VARCHAR_LATIN', 'VARCHAR_UNICODE').
-    - if_exists (str, optional): Determines the action if the table already exists. Options include:
-                                 'fail' (default), which raises an error; and 'replace', which drops the existing table and creates a new one.
-    - primary_index (list, optional): Specifies the columns to be used as the primary index for the table. Enhances data retrieval performance.
-    - partitioning (str, optional): SQL clause to define table partitioning. Aids in managing large datasets efficiently.
-    Returns:
-    str: The name of the newly created feature store table.
-    Note:
-    - Utilizes default schema and feature catalog names as defined in the tdfs4ds module.
-    - The primary index typically includes the entity ID, feature ID, and feature version for optimal data organization.
-    - A secondary index on the feature ID facilitates efficient querying.
-    - Corresponding views offer a snapshot of the current valid-time features, simplifying temporal queries.
-    - Existing tables are handled based on the 'if_exists' parameter, with support for replacing or retaining the tables.
-    - Assumes necessary database access and permissions are available for table and view creation.
-    Example Usage:
-    >>> entity_id_dict = {'customer_id': 'INTEGER'}
-    >>> table_name = feature_store_table_creation(entity_id_dict, 'FLOAT')
-    >>> print(f"Feature store table {table_name} created successfully.")
     """
-    table_name, view_name = get_feature_store_table_name(entity_id, feature_type, primary_index = primary_index, partitioning = partitioning)
-    if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() ==table_name.lower()]) > 0:
-        if tdfs4ds.DISPLAY_LOGS:
-            print(f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
+    table_name, view_name = get_feature_store_table_name(entity_id, feature_type, primary_index=primary_index, partitioning=partitioning)
+    if len([t for t in tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA).TableName if t.lower() == table_name.lower()]) > 0:
+        logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database already exists. No need to create it.')
         return table_name
     else:
-        if tdfs4ds.DISPLAY_LOGS:
-            print(f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
+        logger_safe('info', f'table {table_name} in the {tdfs4ds.SCHEMA} database does not exists. Need to create it.')
     query_feature_value = {
         'FLOAT':            'FEATURE_VALUE FLOAT',
         'BIGINT':           'FEATURE_VALUE BIGINT',
         'VARCHAR_LATIN':    f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET LATIN',
         'VARCHAR_UNICODE':  f'FEATURE_VALUE VARCHAR({tdfs4ds.VARCHAR_SIZE}) CHARACTER SET UNICODE',
-        'TIMESTAMP0' :      'FEATURE_VALUE TIMESTAMP(0)',
-        'TIMESTAMP0TZ' :    'FEATURE_VALUE TIMESTAMP(0) WITH TIME ZONE',
-        'PERIODTS0' :       'FEATURE_VALUE PERIOD(TIMESTAMP(0))',
+        'TIMESTAMP0':       'FEATURE_VALUE TIMESTAMP(0)',
+        'TIMESTAMP0TZ':     'FEATURE_VALUE TIMESTAMP(0) WITH TIME ZONE',
+        'PERIODTS0':        'FEATURE_VALUE PERIOD(TIMESTAMP(0))',
         'PERIODTS0TZ':      'FEATURE_VALUE PERIOD(TIMESTAMP(0) WITH TIME ZONE)',
-        'DECIMAL' :         'FEATURE_VALUE DECIMAL(38,19)'
+        'DECIMAL':          'FEATURE_VALUE DECIMAL(38,19)'
     }
     # Construct the column definitions for the table based on the entity ID
@@ -196,12 +167,14 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
     # SQL query to create the feature store table
     if feature_type.lower() == 'ref':
         partitioning = partitioning.replace('"', "'")
-        partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),','')
+        partitioning = partitioning.replace(f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH}),', '')
         partitioning = partitioning.replace(
             f'RANGE_N(FEATURE_ID BETWEEN 0 AND {tdfs4ds.FEATURE_PARTITION_N} EACH {tdfs4ds.FEATURE_PARTITION_EACH})',
-            '')
+            ''
+        )
         substr = extract_partition_content(partitioning.upper())
-        if len(substr)==0: partitioning =  ''
+        if len(substr) == 0:
+            partitioning = ''
         query = f"""
         CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
             FALLBACK,
@@ -217,7 +190,7 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
             {partitioning};
     """
     else:
-        partitioning = partitioning.replace('"',"'")
+        partitioning = partitioning.replace('"', "'")
         query = f"""
         CREATE MULTISET TABLE {tdfs4ds.SCHEMA}.{table_name},
                 FALLBACK,
@@ -266,39 +239,40 @@ def feature_store_table_creation(entity_id, feature_type, if_exists='fail', prim
     try:
         # Attempt to execute the create table query
-        execute_query(query)
-        execute_query(query3)
+        execute_query(query, raise_error=True)
+        execute_query(query3, raise_error=True)
         if tdml.display.print_sqlmr_query:
-            print(query)
-            print(query3)
-        if tdfs4ds.DISPLAY_LOGS: print(f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been created')
-        #execute_query(query2)
+            logger_safe('info', query)
+            logger_safe('info', query3)
+        logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been created')
+        # execute_query(query2)
     except Exception as e:
-        # If the table already exists and if_exists is set to 'replace', drop the table and recreate it
-        print(str(e).split('\n')[0])
-        if str(e).split('\n')[0].endswith('already exists.') and (if_exists == 'replace'):
-            execute_query(f'DROP TABLE  {tdfs4ds.SCHEMA}.{table_name}')
-            if tdfs4ds.DISPLAY_LOGS: print(f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been dropped')
+        msg = str(e).split('\n')[0]
+        logger_safe('error', msg)
+        if msg.endswith('already exists.') and (if_exists == 'replace'):
+            execute_query(f'DROP TABLE  {tdfs4ds.SCHEMA}.{table_name}', raise_error=True)
+            logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been dropped')
             try:
                 # Attempt to recreate the table after dropping it
-                execute_query(query)
-                if tdfs4ds.DISPLAY_LOGS: print(f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been re-created')
+                execute_query(query, raise_error=True)
+                logger_safe('info', f'TABLE {tdfs4ds.SCHEMA}.{table_name} has been re-created')
                 if tdml.display.print_sqlmr_query:
-                    print(query)
-            except Exception as e:
-                print(str(e).split('\n')[0])
+                    logger_safe('info', query)
+            except Exception as e2:
+                logger_safe('error', str(e2).split('\n')[0])
     try:
         # Attempt to create the view
-        execute_query(query_view)
+        execute_query(query_view, raise_error=True)
         if tdml.display.print_sqlmr_query:
-            print(query)
-        if tdfs4ds.DISPLAY_LOGS: print(f'VIEW {tdfs4ds.SCHEMA}.{view_name} has been created')
+            logger_safe('info', query_view)
+        logger_safe('info', f'VIEW {tdfs4ds.SCHEMA}.{view_name} has been created')
     except Exception as e:
-        print(str(e).split('\n')[0])
+        logger_safe('error', str(e).split('\n')[0])
     return table_name
 def register_features(entity_id, feature_names_types, primary_index = None, partitioning = ''):
     """
     Orchestrates the registration or update of feature definitions in a Teradata database's feature catalog.
@@ -350,46 +324,47 @@ def register_features(entity_id, feature_names_types, primary_index = None, part
 def _register_features_merge(entity_id, feature_names_types, primary_index=None, partitioning=''):
     """
-    Registers or updates feature definitions in a Teradata database's feature catalog, associating entity identifiers
-    with feature names, types, and other metadata. This function prepares and executes SQL operations to insert new
-    feature definitions or update existing ones, considering partitioning strategies and primary index configurations.
-    Parameters:
-    - entity_id (dict): Specifies the entity's identifiers with keys representing attribute names. This dictionary
-                        is crucial for defining the scope and granularity of feature data.
-    - feature_names_types (dict): Maps feature names to their properties, including data types and unique identifiers.
-                                  Each value is a dictionary with keys 'type' and 'id' indicating the feature's data
-                                  type and a unique identifier, respectively.
-    - primary_index (list, optional): Identifies the primary index column(s) for the feature data. This influences
-                                      the organization and performance of database operations. If not specified,
-                                      defaults are used based on the entity_id structure.
-    - partitioning (str, optional): Describes the partitioning strategy through a string listing column names used
-                                    for partitioning. This can impact data storage and retrieval performance.
-    Returns:
-    pd.DataFrame: Contains details of the registered features, including names, types, IDs, and references to the
-                  respective feature store table and view names, alongside metadata about the entity and database schema.
-    Note:
-    - The function dynamically constructs SQL queries for inserting new features or updating existing ones in the
-      feature catalog, adapting to the provided partitioning and primary index settings.
-    - Assumes the existence of a Teradata feature catalog table in the specified schema and that the database connection
-      is correctly configured.
-    - Utilizes the tdfs4ds module for database schema configurations and valid-time temporal table considerations.
-    Example Usage:
-    >>> entity_id = {'customer_id': 'INTEGER'}
-    >>> feature_names_types = {'age': {'type': 'BIGINT', 'id': 1}, 'gender': {'type': 'VARCHAR_LATIN', 'id': 2}}
-    >>> registered_features = register_features(entity_id, feature_names_types)
-    >>> print(registered_features)
-    This example demonstrates registering features for an entity with attributes customer_id, age, and gender,
-    where age and gender features have specified types and unique IDs.
+    Register or update feature definitions in the feature catalog, with temporal support.
+    This function builds (or refreshes) entries in the Teradata feature catalog from a
+    mapping of feature names to their metadata, computes the target feature store table
+    and view names, stages the metadata to a temporary table, and executes a MERGE into
+    the catalog (with optional VALIDTIME support based on `tdfs4ds.FEATURE_STORE_TIME`).
+    Parameters
+    ----------
+    entity_id : dict[str, Any]
+        Mapping of entity-key column names to types. Only the keys (column names) are
+        required here; values are not used by this function.
+    feature_names_types : dict[str, dict]
+        Dict of feature name -> {"type": <SQL_TYPE>, "id": <int>} describing each
+        feature’s storage type and identifier in the catalog.
+    primary_index : list[str] | None, optional
+        Primary index column(s) to use when deriving the feature store table/view names.
+        If None, defaults are inferred by `get_feature_store_table_name`.
+    partitioning : str, optional
+        Partitioning expression or comma-separated column list used by
+        `get_feature_store_table_name`.
+    Returns
+    -------
+    pd.DataFrame
+        A dataframe of the features that were (up)registered, including:
+        FEATURE_NAME, FEATURE_TYPE, FEATURE_ID, FEATURE_TABLE, FEATURE_VIEW,
+        ENTITY_NAME, FEATURE_DATABASE, DATA_DOMAIN.
+    Notes
+    -----
+    - When `tdfs4ds.FEATURE_STORE_TIME is None`, uses CURRENT VALIDTIME (non-explicit start/end).
+      Otherwise uses `VALIDTIME PERIOD ('<FEATURE_STORE_TIME>', '<END_PERIOD>')` and adds
+      the valid-time start/end when inserting.
+    - Respects `tdfs4ds.DISPLAY_LOGS` via `logger_safe`.
     """
-    if tdfs4ds.FEATURE_STORE_TIME == None:
+    # --- VALIDTIME setup -----------------------------------------------------
+    if tdfs4ds.FEATURE_STORE_TIME is None:
         validtime_statement = 'CURRENT VALIDTIME'
-        validtime_start = 'CAST(CURRENT_TIME AS TIMESTAMP(0) WITH TIME ZONE)'
+        validtime_start = "CAST(CURRENT_TIME AS TIMESTAMP(0) WITH TIME ZONE)"
     else:
         validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{tdfs4ds.END_PERIOD})'"
         validtime_start = f"CAST('{tdfs4ds.FEATURE_STORE_TIME}' AS TIMESTAMP(0) WITH TIME ZONE)"
@@ -399,154 +374,174 @@ def _register_features_merge(entity_id, feature_names_types, primary_index=None,
     else:
         end_period_ = tdfs4ds.END_PERIOD
-    if len(list(feature_names_types.keys())) == 0:
-        if tdfs4ds.DISPLAY_LOGS: print('no new feature to register')
+    # --- Input checks & early exit ------------------------------------------
+    if not feature_names_types:
+        logger_safe("info", "register_features: no new features to register")
         return
-    # Create a comma-separated string of entity IDs
-    entity_id_list = list(entity_id.keys())
-    entity_id_list.sort()
-    ENTITY_ID__ = ','.join([k for k in entity_id_list])
+    # --- Entity columns (ordered, stable) -----------------------------------
+    entity_cols = sorted(list(entity_id.keys()))
+    ENTITY_ID__ = ",".join(entity_cols)
-    # Create a DataFrame from the feature_names_types dictionary
-    if len(feature_names_types.keys()) > 1:
-        df = pd.DataFrame(feature_names_types).transpose().reset_index()
-        df.columns = ['FEATURE_NAME', 'FEATURE_TYPE', 'FEATURE_ID']
-    else:
-        df = pd.DataFrame(columns=['FEATURE_NAME', 'FEATURE_TYPE', 'FEATURE_ID'])
-        k = list(feature_names_types.keys())[0]
-        df['FEATURE_NAME'] = [k]
-        df['FEATURE_TYPE'] = [feature_names_types[k]['type']]
-        df['FEATURE_ID'] = [feature_names_types[k]['id']]
-    if tdfs4ds.DEBUG_MODE:
-        print('register_features', 'primary_index', primary_index)
-        print('register_features', 'partitioning', partitioning)
-        print('df', df)
-    # Generate the feature table and view names based on the entity ID and feature type
-    df['FEATURE_TABLE'] = df.apply(lambda row: get_feature_store_table_name(entity_id, row.iloc[1],
-                                                                            primary_index=primary_index,
-                                                                            partitioning=partitioning)[0],
-                                   axis=1)
-    df['FEATURE_VIEW'] = df.apply(lambda row: get_feature_store_table_name(entity_id, row.iloc[1],
-                                                                           primary_index=primary_index,
-                                                                           partitioning=partitioning)[1],
-                                  axis=1)
-    # Add additional columns to the DataFrame
-    df['ENTITY_NAME'] = ENTITY_ID__
-    df['FEATURE_DATABASE'] = tdfs4ds.SCHEMA
-    df['DATA_DOMAIN'] = tdfs4ds.DATA_DOMAIN
-    # Copy the DataFrame to a temporary table in Teradata
-    tdml.copy_to_sql(df, table_name='temp', schema_name=tdfs4ds.SCHEMA, if_exists='replace',
-                     primary_index='FEATURE_ID',
-                     types={'FEATURE_ID': tdml.BIGINT})
-    if tdfs4ds.DEBUG_MODE:
-        print("-----------_register_features_merge - df")
-        print(df)
-    if tdfs4ds.FEATURE_STORE_TIME == None:
+    # --- Build dataframe safely (no transpose tricks) ------------------------
+    rows = []
+    for fname, meta in feature_names_types.items():
+        try:
+            rows.append({
+                "FEATURE_NAME": fname,
+                "FEATURE_TYPE": meta["type"],
+                "FEATURE_ID":   meta["id"],
+            })
+        except KeyError as e:
+            logger_safe("error", "register_features: missing key %s in feature '%s' meta=%s", str(e), fname, meta)
+            raise
+    df = pd.DataFrame(rows, columns=["FEATURE_NAME", "FEATURE_TYPE", "FEATURE_ID"])
+    logger_safe(
+        "debug",
+        "register_features: features_count=%d | entity_cols=%s | primary_index=%s | partitioning=%s",
+        len(df),
+        entity_cols,
+        primary_index,
+        partitioning,
+    )
+    # --- Compute feature table & view names ---------------------------------
+    # Use apply to preserve original order; get_feature_store_table_name returns (table, view)
+    df["FEATURE_TABLE"] = df.apply(
+        lambda row: get_feature_store_table_name(
+            entity_id,
+            row["FEATURE_TYPE"],
+            primary_index=primary_index,
+            partitioning=partitioning
+        )[0],
+        axis=1
+    )
+    df["FEATURE_VIEW"] = df.apply(
+        lambda row: get_feature_store_table_name(
+            entity_id,
+            row["FEATURE_TYPE"],
+            primary_index=primary_index,
+            partitioning=partitioning
+        )[1],
+        axis=1
+    )
+    # --- Add catalog columns -------------------------------------------------
+    df["ENTITY_NAME"] = ENTITY_ID__
+    df["FEATURE_DATABASE"] = tdfs4ds.SCHEMA
+    df["DATA_DOMAIN"] = tdfs4ds.DATA_DOMAIN
+    # --- Stage to temp table -------------------------------------------------
+    tdml.copy_to_sql(
+        df,
+        table_name="temp",
+        schema_name=tdfs4ds.SCHEMA,
+        if_exists="replace",
+        primary_index="FEATURE_ID",
+        types={"FEATURE_ID": tdml.BIGINT},
+    )
+    logger_safe("debug", "register_features: staged %d rows to %s.temp", len(df), tdfs4ds.SCHEMA)
+    # --- Build MERGE statement ----------------------------------------------
+    if tdfs4ds.FEATURE_STORE_TIME is None:
+        # no explicit start/end in INSERT branch
         query_merge = f"""
          {validtime_statement}
-         MERGE INTO  {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
+         MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
          USING (
              SELECT
-                CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
-            ,   A.FEATURE_NAME
-            ,   A.FEATURE_TYPE
-            ,   A.FEATURE_TABLE
-            ,   A.FEATURE_DATABASE
-            ,   A.FEATURE_VIEW
-            ,   A.ENTITY_NAME
-            ,   A.DATA_DOMAIN
-            FROM {tdfs4ds.SCHEMA}.temp A
-            LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
-            ON A.FEATURE_NAME = B.FEATURE_NAME
-            AND A.ENTITY_NAME = B.ENTITY_NAME -- modified
-            AND A.DATA_DOMAIN = B.DATA_DOMAIN
-             ) UPDATED_FEATURES
-         ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
-         AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
-         AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
-         WHEN MATCHED THEN
-             UPDATE
-             SET
-                FEATURE_TABLE    = UPDATED_FEATURES.FEATURE_TABLE,
-                FEATURE_TYPE     = UPDATED_FEATURES.FEATURE_TYPE,
-                FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE,
-                FEATURE_VIEW     = UPDATED_FEATURES.FEATURE_VIEW
-                --,ENTITY_NAME      = UPDATED_FEATURES.ENTITY_NAME -- modified
-         WHEN NOT MATCHED THEN
-             INSERT
+                   CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
+                 , A.FEATURE_NAME
+                 , A.FEATURE_TYPE
+                 , A.FEATURE_TABLE
+                 , A.FEATURE_DATABASE
+                 , A.FEATURE_VIEW
+                 , A.ENTITY_NAME
+                 , A.DATA_DOMAIN
+             FROM {tdfs4ds.SCHEMA}.temp A
+             LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
+               ON A.FEATURE_NAME = B.FEATURE_NAME
+              AND A.ENTITY_NAME = B.ENTITY_NAME
+              AND A.DATA_DOMAIN = B.DATA_DOMAIN
+         ) UPDATED_FEATURES
+           ON UPDATED_FEATURES.FEATURE_ID   = EXISTING_FEATURES.FEATURE_ID
+          AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
+          AND UPDATED_FEATURES.DATA_DOMAIN  = EXISTING_FEATURES.DATA_DOMAIN
+         WHEN MATCHED THEN UPDATE SET
+               FEATURE_TABLE    = UPDATED_FEATURES.FEATURE_TABLE
+             , FEATURE_TYPE     = UPDATED_FEATURES.FEATURE_TYPE
+             , FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
+             , FEATURE_VIEW     = UPDATED_FEATURES.FEATURE_VIEW
+         WHEN NOT MATCHED THEN INSERT
              (  UPDATED_FEATURES.FEATURE_ID
-            ,   UPDATED_FEATURES.FEATURE_NAME
-            ,   UPDATED_FEATURES.FEATURE_TYPE
-            ,   UPDATED_FEATURES.FEATURE_TABLE
-            ,   UPDATED_FEATURES.FEATURE_DATABASE
-            ,   UPDATED_FEATURES.FEATURE_VIEW
-            ,   UPDATED_FEATURES.ENTITY_NAME
-            ,   UPDATED_FEATURES.DATA_DOMAIN
-            )
-         """
+              , UPDATED_FEATURES.FEATURE_NAME
+              , UPDATED_FEATURES.FEATURE_TYPE
+              , UPDATED_FEATURES.FEATURE_TABLE
+              , UPDATED_FEATURES.FEATURE_DATABASE
+              , UPDATED_FEATURES.FEATURE_VIEW
+              , UPDATED_FEATURES.ENTITY_NAME
+              , UPDATED_FEATURES.DATA_DOMAIN
+             );
+        """
     else:
+        # insert with explicit valid-time start/end
         query_merge = f"""
          {validtime_statement}
-         MERGE INTO  {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
+         MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
          USING (
              SELECT
-                CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
-            ,   A.FEATURE_NAME
-            ,   A.FEATURE_TYPE
-            ,   A.FEATURE_TABLE
-            ,   A.FEATURE_DATABASE
-            ,   A.FEATURE_VIEW
-            ,   A.ENTITY_NAME
-            ,   A.DATA_DOMAIN
-            FROM {tdfs4ds.SCHEMA}.temp A
-            LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
-            ON A.FEATURE_NAME = B.FEATURE_NAME
-            AND A.ENTITY_NAME = B.ENTITY_NAME -- modified
-            AND A.DATA_DOMAIN = B.DATA_DOMAIN
-             ) UPDATED_FEATURES
-         ON  UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
-         AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
-         AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
-         WHEN MATCHED THEN
-             UPDATE
-             SET
-                FEATURE_TABLE    = UPDATED_FEATURES.FEATURE_TABLE,
-                FEATURE_TYPE    = UPDATED_FEATURES.FEATURE_TYPE,
-                FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE,
-                FEATURE_VIEW     = UPDATED_FEATURES.FEATURE_VIEW
-                --,ENTITY_NAME      = UPDATED_FEATURES.ENTITY_NAME -- modified
-         WHEN NOT MATCHED THEN
-             INSERT
+                   CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
+                 , A.FEATURE_NAME
+                 , A.FEATURE_TYPE
+                 , A.FEATURE_TABLE
+                 , A.FEATURE_DATABASE
+                 , A.FEATURE_VIEW
+                 , A.ENTITY_NAME
+                 , A.DATA_DOMAIN
+             FROM {tdfs4ds.SCHEMA}.temp A
+             LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
+               ON A.FEATURE_NAME = B.FEATURE_NAME
+              AND A.ENTITY_NAME = B.ENTITY_NAME
+              AND A.DATA_DOMAIN = B.DATA_DOMAIN
+         ) UPDATED_FEATURES
+           ON UPDATED_FEATURES.FEATURE_ID   = EXISTING_FEATURES.FEATURE_ID
+          AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
+          AND UPDATED_FEATURES.DATA_DOMAIN  = EXISTING_FEATURES.DATA_DOMAIN
+         WHEN MATCHED THEN UPDATE SET
+               FEATURE_TABLE    = UPDATED_FEATURES.FEATURE_TABLE
+             , FEATURE_TYPE     = UPDATED_FEATURES.FEATURE_TYPE
+             , FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
+             , FEATURE_VIEW     = UPDATED_FEATURES.FEATURE_VIEW
+         WHEN NOT MATCHED THEN INSERT
              (  UPDATED_FEATURES.FEATURE_ID
-            ,   UPDATED_FEATURES.FEATURE_NAME
-            ,   UPDATED_FEATURES.FEATURE_TYPE
-            ,   UPDATED_FEATURES.FEATURE_TABLE
-            ,   UPDATED_FEATURES.FEATURE_DATABASE
-            ,   UPDATED_FEATURES.FEATURE_VIEW
-            ,   UPDATED_FEATURES.ENTITY_NAME
-            ,   UPDATED_FEATURES.DATA_DOMAIN,
-             {validtime_start},
-             '{end_period_}')
-         """
+              , UPDATED_FEATURES.FEATURE_NAME
+              , UPDATED_FEATURES.FEATURE_TYPE
+              , UPDATED_FEATURES.FEATURE_TABLE
+              , UPDATED_FEATURES.FEATURE_DATABASE
+              , UPDATED_FEATURES.FEATURE_VIEW
+              , UPDATED_FEATURES.ENTITY_NAME
+              , UPDATED_FEATURES.DATA_DOMAIN
+              , {validtime_start}
+              , '{end_period_}'
+             );
+        """
-    if tdfs4ds.DEBUG_MODE:
-        print("-----------_register_features_merge - query_merge")
-        print(query_merge)
-    # Execute the update and insert queries
+    logger_safe("debug", "register_features: merge_sql_preview=%s", " ".join(query_merge.split())[:400] + " ...")
+    # --- Execute MERGE -------------------------------------------------------
     execute_query(query_merge)
+    logger_safe(
+        "info",
+        "register_features: merged %d features into %s.%s",
+        len(df),
+        tdfs4ds.SCHEMA,
+        tdfs4ds.FEATURE_CATALOG_NAME,
+    )
     return df
 def _register_features_update_insert(entity_id, feature_names_types, primary_index = None, partitioning = ''):
     """
     Registers or updates feature definitions in a Teradata database's feature catalog, associating entity identifiers
@@ -869,39 +864,6 @@ def Gettdtypes(tddf, features_columns, entity_id):
             # Increment the feature ID for the next iteration.
             feature_id += 1
-    # # Iterate over the data types of the columns in the DataFrame.
-    # for k, v in types.items():
-    #     # If the column name does not exist in the feature catalog table and is in the list of feature column names...
-    #     if k.upper() not in [n.upper() for n in existing_features] and k.upper() in [n.upper() for n in features_columns]:
-    #         # If the data type of the column is integer...
-    #         if 'int' in str(v.lower()):
-    #             # Add an entry to the result dictionary for the column name with its data type and new feature ID.
-    #             res[k] = {'type': 'BIGINT', 'id': feature_id}
-    #         # If the data type of the column is float...
-    #         elif 'float' in str(v.lower()):
-    #             # Add an entry to the result dictionary for the column name with its data type and new feature ID.
-    #             res[k] = {'type': 'FLOAT', 'id': feature_id}
-    #         # If the data type of the column is varchar with unicode encoding ...
-    #         elif 'unicode' in str(v.lower()):
-    #             res[k] = {'type': 'VARCHAR_UNICODE', 'id': feature_id}
-    #             # Print a message that the data type is not yet managed.
-    #             #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
-    #         # If the data type of the column is varchar with unicode encoding ...
-    #         elif 'latin' in str(v.lower()):
-    #             res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
-    #             # Print a message that the data type is not yet managed.
-    #             #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
-    #         elif 'decimal' in str(v.lower()):
-    #             res[k] = {'type': 'DECIMAL', 'id': feature_id}
-    #             # Print a message that the data type is not yet managed.
-    #             # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
-    #         else:
-    #             res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
-    #             # Print a message that the data type is not yet managed.
-    #             # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
-    #         # Increment the feature ID for the next iteration.
-    #         feature_id += 1
     # Return the result dictionary.
     return res
@@ -979,36 +941,50 @@ def tdstone2_Gettdtypes(existing_model, entity_id, display_logs=False):
     # Return the dictionary containing feature names, types, and IDs.
     return res
-def delete_feature(feature_name, data_domain=None):
+def delete_feature(feature_name, entity_id, data_domain=None):
     """
-    Delete the values of a specific feature from the feature table within a given data domain.
+    Delete the values of a specific feature for given entities from the feature table
+    within a specified data domain.
     This function constructs and executes two SQL queries against a Teradata database
-    to remove a feature specified by its name. The first query retrieves the table name
-    where the feature resides, based on the feature name and data domain. The second query
-    deletes the feature from the identified table.
+    to remove a feature specified by its name and entity identifiers. The first query
+    retrieves the table name where the feature resides, based on the feature name,
+    entity, and data domain. The second query deletes the feature values from the
+    identified table.
     Parameters:
     - feature_name (str): The name of the feature to be removed.
-    - data_domain (str, optional): The data domain where the feature is located. If not specified,
-      the function uses the default data domain defined in tdfs4ds.DATA_DOMAIN.
-    The function checks if the DEBUG_MODE flag in the tdfs4ds module is set to True. If so,
-    it prints the SQL queries and the resolved table name for debugging purposes.
+    - entity_id (str or list of str): Entity identifier(s). If a string is provided,
+      it will be converted to a single-element list. The list is always sorted
+      alphabetically before use.
+    - data_domain (str, optional): The data domain where the feature is located.
+      If not specified, the function uses the default data domain defined in
+      `tdfs4ds.DATA_DOMAIN`.
+    Behavior:
+    - The function checks if the `DEBUG_MODE` flag in the `tdfs4ds` module is set to True.
+      If so, it prints the generated SQL queries and the resolved table name for debugging.
+    - If the feature table cannot be resolved, the function returns without executing
+      a delete query.
-    The function does not return any value.
+    Returns:
+    - None
     Note:
     - The function assumes the presence of a module `tdfs4ds` with predefined constants
-      such as `DATA_DOMAIN`, `SCHEMA`, `FEATURE_CATALOG_NAME`, and a flag `DEBUG_MODE`.
+      such as `DATA_DOMAIN`, `SCHEMA`, `FEATURE_CATALOG_NAME_VIEW`, and a flag `DEBUG_MODE`.
     - It also assumes a `tdml` module or object with an `execute_sql` method capable of
       executing SQL queries against a Teradata database and fetching the results.
     Raises:
-    - This function might raise exceptions related to SQL execution or connection issues,
-      which are not explicitly handled within the function itself.
+    - Exceptions related to SQL execution or connection issues may be raised but are not
+      explicitly handled, except for printing the error message.
     """
+    if isinstance(entity_id, str):
+        entity_id = [entity_id]
+    entity_id = sorted(entity_id)
     if data_domain is None:
         data_domain = tdfs4ds.DATA_DOMAIN
@@ -1016,19 +992,21 @@ def delete_feature(feature_name, data_domain=None):
     SEL FEATURE_DATABASE||'.'||FEATURE_TABLE AS TABLE_NAME
     FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
     WHERE FEATURE_NAME = '{feature_name}'
-    AND DATA_DOMAIN = '{data_domain}'"""
+    AND DATA_DOMAIN = '{data_domain}'
+    AND ENTITY_NAME = '{','.join([e.upper() for e in entity_id])}'"""
     if tdfs4ds.DEBUG_MODE:
         print(query0)
     table_name = tdml.execute_sql(query0).fetchall()
-    if len(table_name)>0:
+    if len(table_name) > 0:
         table_name = table_name[0][0]
     else:
         return
     if tdfs4ds.DEBUG_MODE:
         print('table name : ', table_name)
     query = f"""
-    DELETE {table_name}
+    NONSEQUENCED VALIDTIME DELETE {table_name}
     WHERE FEATURE_ID = (
         SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
         WHERE FEATURE_NAME = '{feature_name}'
@@ -1044,6 +1022,7 @@ def delete_feature(feature_name, data_domain=None):
     return
 def remove_feature(feature_name, entity_id, data_domain=None):
     """
     Attempts to remove a specific feature from the feature catalog and any associated data,
@@ -1060,7 +1039,9 @@ def remove_feature(feature_name, entity_id, data_domain=None):
     Parameters:
     - feature_name (str): The name of the feature to be removed.
-    - entity_id (list of str): A list of entity identifiers associated with the feature.
+    - entity_id (str or list of str): Entity identifier(s). If a string is provided,
+      it will be converted to a single-element list. The list is always sorted
+      alphabetically before use.
     - data_domain (str, optional): The data domain where the feature is located. If not provided,
       the function uses the default data domain from the `tdfs4ds.DATA_DOMAIN` setting.
@@ -1084,16 +1065,19 @@ def remove_feature(feature_name, entity_id, data_domain=None):
     - SQL execution or connection exceptions might occur but are not explicitly handled by this function.
     """
+    if isinstance(entity_id, str):
+        entity_id = [entity_id]
+    entity_id = sorted(entity_id)
     if data_domain is None:
         data_domain = tdfs4ds.DATA_DOMAIN
     try:
-        delete_feature(feature_name, data_domain)
+        delete_feature(feature_name, entity_id, data_domain)
     except Exception as e:
         print(str(e).split('\n')[0])
         return
-    entity_id.sort()
     query = f"""
     NONSEQUENCED VALIDTIME DELETE {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME}
     WHERE FEATURE_NAME = '{feature_name}'
@@ -1102,7 +1086,6 @@ def remove_feature(feature_name, entity_id, data_domain=None):
     """
     if tdfs4ds.DEBUG_MODE:
         print(query)
     tdml.execute_sql(query)
     return

tdfs4ds 0.2.4.25__py3-none-any.whl → 0.2.4.41__py3-none-any.whl

tdfs4ds 0.2.4.25py3-none-any.whl → 0.2.4.41py3-none-any.whl