PyPI - tdfs4ds - Versions diffs - 0.2.4.26__py3-none-any.whl → 0.2.4.27__py3-none-any.whl - Mend

tdfs4ds 0.2.4.26py3-none-any.whl → 0.2.4.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

tdfs4ds/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.2.4.26'
+__version__ = '0.2.4.27'
 import logging
 # Setup the logger
 logging.basicConfig(
@@ -935,6 +935,10 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
     selected_features : dict
         A dictionary where the keys are feature table names, and the values are lists of tuples
         (feature_id, feature_version, feature_name) specifying the features to retrieve.
+        NOTE: feature_version may be either:
+          - a single UUID string, or
+          - a list of dicts like:
+              {"process_id": <UUID>, "process_view_name": <str>}
     view_name : str
         The name of the view to be created in the database.
@@ -1004,6 +1008,24 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
     # Sort the entity ID list for consistent query generation
     list_entity_id.sort()
+    # Helpers
+    import re
+    def _sanitize_identifier(name: str) -> str:
+        # Keep letters, numbers, and underscores; replace others with '_'
+        return re.sub(r'[^0-9A-Za-z_]', '_', name)
+    used_alias_counts = {}  # base_alias -> count
+    def _unique_alias(base: str) -> str:
+        """
+        Ensure alias uniqueness: if base already used, append _2, _3, ...
+        """
+        if base not in used_alias_counts:
+            used_alias_counts[base] = 1
+            return base
+        used_alias_counts[base] += 1
+        return f"{base}_{used_alias_counts[base]}"
     # Initialize sub-query construction
     tdfs4ds.logger.info("Generating the sub-queries for feature retrieval.")
     sub_queries = []
@@ -1014,21 +1036,52 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
     # Construct sub-queries for each feature
     for k, v in list_features.items():
         for feature_id, feature_version, feature_name in v:
-            txt_where = f"(FEATURE_ID = {feature_id} AND FEATURE_VERSION='{feature_version}')"
-            feature_str = ',B1.FEATURE_VALUE AS ' + feature_name
-            sub_queries.append(
-                {
-                    'feature_name': feature_name,
-                    'query': f"""
-                    SEQUENCED VALIDTIME
-                    SELECT
-                       {txt_entity}
-                      {feature_str}
-                    FROM {k} B1
-                    WHERE {txt_where}
-                    """
-                }
-            )
+            # Multiple processes: list of dicts
+            if isinstance(feature_version, list):
+                for item in feature_version:
+                    process_id = item.get("process_id")
+                    process_view_name = item.get("process_view_name") or "PROCESS"
+                    base_alias = _sanitize_identifier(f"{feature_name}_{process_view_name}")
+                    alias = _unique_alias(base_alias)
+                    txt_where = f"(FEATURE_ID = {feature_id} AND FEATURE_VERSION='{process_id}')"
+                    feature_str = ',B1.FEATURE_VALUE AS ' + alias
+                    sub_queries.append(
+                        {
+                            'feature_name': alias,
+                            'query': f"""
+                            SEQUENCED VALIDTIME
+                            SELECT
+                               {txt_entity}
+                              {feature_str}
+                            FROM {k} B1
+                            WHERE {txt_where}
+                            """
+                        }
+                    )
+            # Single UUID
+            else:
+                base_alias = _sanitize_identifier(feature_name)
+                alias = _unique_alias(base_alias)
+                txt_where = f"(FEATURE_ID = {feature_id} AND FEATURE_VERSION='{feature_version}')"
+                feature_str = ',B1.FEATURE_VALUE AS ' + alias
+                sub_queries.append(
+                    {
+                        'feature_name': alias,
+                        'query': f"""
+                        SEQUENCED VALIDTIME
+                        SELECT
+                           {txt_entity}
+                          {feature_str}
+                        FROM {k} B1
+                        WHERE {txt_where}
+                        """
+                    }
+                )
     # Handle case where no features are available
     if len(sub_queries) == 0:
@@ -1102,6 +1155,7 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
         return tdml.DataFrame.from_table(tdml.in_schema(schema_name, view_name))
 def build_dataset_opt(entity_id, selected_features, view_name = None, schema_name=tdfs4ds.SCHEMA,
                   comment='dataset', no_temporal=False, time_manager=None, query_only=False, entity_null_substitute={},
                   other=None, time_column=None, filtermanager = None, filter_conditions = None

tdfs4ds/feature_store/feature_query_retrieval.py CHANGED Viewed

@@ -249,48 +249,49 @@ def get_list_features(entity_name, domain=None):
     return tdml.DataFrame.from_query(query)
-def get_feature_versions(entity_name, features, domain=None, latest_version_only=True, version_lag=0):
+def get_feature_versions(entity_name, features, domain=None):
     """
-    Retrieve feature versions for specified features associated with certain entities
-    from a given data domain. This function allows fetching either all versions or
-    just the latest versions of the features.
+    Retrieve version UUID(s) for the given features of an entity within a domain.
     Parameters:
-    entity_name (str or list): The name of the entity or a list of entity names
-                               for which feature versions are to be fetched.
-    features (list): A list of features for which versions are required.
-    domain (str, optional): The data domain to filter the feature versions.
-                            Defaults to None, where a predefined domain is used.
-    latest_version_only (bool, optional): Flag to fetch only the latest version
-                                          of each feature. Defaults to True.
-    version_lag (int, optional): The number of versions to lag behind the latest.
-                                 Only effective if latest_version_only is True. Defaults to 0.
+    - entity_name (str): The entity name to which the features belong.
+    - features (str | list[str]): Feature name or list of feature names.
+    - domain (str, optional): Data domain to filter on. If None, defaults to tdfs4ds.DATA_DOMAIN.
     Returns:
-    dict: A dictionary with feature names as keys and their corresponding versions as values.
+    - dict[str, str | list[dict]]: Maps each requested feature name to either:
+        - a single version UUID string if exactly one row exists, or
+        - a list of dicts if multiple rows exist; each dict has:
+            {
+              "process_id": <FEATURE_VERSION UUID>,
+              "process_view_name": <PROCESS_VIEW_NAME string>
+            }
+      If a requested feature has no entries, it will be present with value None.
+    Notes:
+    - Uses {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} as A and
+      {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME} as B (must exist) joined on PROCESS_ID.
+    - Respects tdfs4ds.DEBUG_MODE to print the generated SQL.
     """
-    # Default to a predefined data domain if none is provided
+    # Normalize inputs
+    if isinstance(features, str):
+        features = [features]
     if domain is None:
         domain = tdfs4ds.DATA_DOMAIN
+    # Basic escaping for single quotes in values used in SQL literals
+    def _esc(s: str) -> str:
+        return s.replace("'", "''")
-    # Convert the entity_name to a string if it is a list
-    if type(entity_name) == list:
-        entity_name.sort()
-        entity_name = ','.join(entity_name)
-    # Preparing the feature names for inclusion in the SQL query
-    if type(features) == list:
-        features = ["'" + f + "'" for f in features]
-    else:
-        features = "'" + features + "'"
+    features_lits = ",".join(f"'{_esc(f)}'" for f in features)
     query = f"""
         SELECT
             A.FEATURE_NAME
         ,   B.PROCESS_ID AS FEATURE_VERSION
+        ,   B.VIEW_NAME AS PROCESS_VIEW_NAME
         FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} A
         INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW_FEATURE_SPLIT} B
         ON A.DATA_DOMAIN = B.DATA_DOMAIN
@@ -298,35 +299,45 @@ def get_feature_versions(entity_name, features, domain=None, latest_version_only
         AND A.FEATURE_NAME = B.FEATURE_NAME
         WHERE A.DATA_DOMAIN = '{domain}'
         AND A.ENTITY_NAME = '{entity_name}'
-        AND A.FEATURE_NAME IN ({','.join(features)})
+        AND A.FEATURE_NAME IN ({features_lits})
     """
-    # Executing the first query and converting the results to a pandas DataFrame
-    df = tdml.DataFrame.from_query(query).to_pandas()
-    # if df is empty
-    if df.shape[0] == 0:
-        print('the features you are requesting for this entity and data domain do not exist. Here is what you requested:')
-        print('feature store database :', tdfs4ds.SCHEMA)
-        print('feature catalog        :', tdfs4ds.FEATURE_CATALOG_NAME_VIEW)
-        print('entity name            :', entity_name)
-        print('data domain            :', domain)
-        print('features               :', ','.join(features))
-        print('')
+    if tdfs4ds.DEBUG_MODE:
         print(query)
-        return
-    if tdfs4ds.DEBUG_MODE == True:
-        print(query)
+    rows = tdml.execute_sql(query).fetchall()
+    # Initialize result for all requested features
+    result = {f: None for f in features}
+    # Collect (version, view) per feature, deduplicating while preserving order
+    tmp = {f: [] for f in features}
+    seen = {f: set() for f in features}
+    for feat, version, view_name in rows:
+        key = (version, view_name)
+        if key not in seen.setdefault(feat, set()):
+            seen[feat].add(key)
+            tmp.setdefault(feat, []).append(key)
+    # Shape:
+    # - if exactly one row: return UUID string
+    # - if multiple rows: list of {"process_id": <uuid>, "process_view_name": <str>}
+    for feat in result:
+        pairs = tmp.get(feat, [])
+        if len(pairs) == 0:
+            result[feat] = None
+        elif len(pairs) == 1:
+            result[feat] = pairs[0][0]  # UUID only
+        else:
+            result[feat] = [
+                {"process_id": ver, "process_view_name": view}
+                for (ver, view) in pairs
+            ]
+    return result
-    # results in dictionary:
-    results = {row['FEATURE_NAME']: row['FEATURE_VERSION'] for i, row in df.iterrows()}
-    if tdfs4ds.DEBUG_MODE == True:
-        print('---> RESULTS <---')
-        print(results)
-    # Returning the results as a dictionary with feature names as keys and their versions as values
-    return results
 def get_entity_tables(entity_id, data_domain=None):
     """
     Retrieves a list of table names associated with a given entity ID or IDs from a feature catalog within a specific data domain.

{tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.27.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tdfs4ds
-Version: 0.2.4.26
+Version: 0.2.4.27
 Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
 Author: Denis Molin
 Requires-Python: >=3.6

{tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.27.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
 tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
 tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
 tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
-tdfs4ds/__init__.py,sha256=_UnSzqinlnbLOM4wOTxJrT1a_qTn6mRiNHz4jE6bRaI,64168
+tdfs4ds/__init__.py,sha256=sHzEWvxrBA_DBbOBJOsFuIxz0qX9MAY3zdS20gnCz_Q,66290
 tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
 tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
 tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -18,7 +18,7 @@ tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22
 tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
 tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
 tdfs4ds/feature_store/feature_data_processing.py,sha256=rvpnFrV6Tmg8C6xcSQLT_lrFYqZsdSzFXmS-4suK9qg,42847
-tdfs4ds/feature_store/feature_query_retrieval.py,sha256=zuHRZhL6-qyLpPS7mWgRy1WingSN5iibkbi53Q7jfAs,33834
+tdfs4ds/feature_store/feature_query_retrieval.py,sha256=0ZLJWtV13tjaUdYCiQvPvYWxKs0f_3LZ2HgfQzHyaW4,33705
 tdfs4ds/feature_store/feature_store_management.py,sha256=ufIBTdrnHBvGdXggavJoTVoZjOHFtH5ZiYqJr5eIBhg,54713
 tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
 tdfs4ds/process_store/process_followup.py,sha256=PvLcU7meg3ljBlPfuez3qwTVqpHHhVJxYxGqjgiHE8E,7265
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
 tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
 tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
 tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
-tdfs4ds-0.2.4.26.dist-info/METADATA,sha256=15eq8Z08VdFjD-GXC2cLqGvfb8OQoDRi3oPlmTyiq00,14326
-tdfs4ds-0.2.4.26.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-tdfs4ds-0.2.4.26.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
-tdfs4ds-0.2.4.26.dist-info/RECORD,,
+tdfs4ds-0.2.4.27.dist-info/METADATA,sha256=0zXOf1EjCvIPgXK3EyOtMDlF4ZB5nArvMsKcqFqknTg,14326
+tdfs4ds-0.2.4.27.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+tdfs4ds-0.2.4.27.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
+tdfs4ds-0.2.4.27.dist-info/RECORD,,

{tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.27.dist-info}/WHEEL RENAMED Viewed

File without changes

{tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.27.dist-info}/top_level.txt RENAMED Viewed

File without changes

tdfs4ds 0.2.4.26__py3-none-any.whl → 0.2.4.27__py3-none-any.whl

tdfs4ds 0.2.4.26py3-none-any.whl → 0.2.4.27py3-none-any.whl