PyPI - tdfs4ds - Versions diffs - 0.2.4.12__py3-none-any.whl → 0.2.4.14__py3-none-any.whl - Mend

tdfs4ds 0.2.4.12py3-none-any.whl → 0.2.4.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

tdfs4ds/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.2.4.12'
+__version__ = '0.2.4.14'
 import logging
 # Setup the logger
 logging.basicConfig(
@@ -27,8 +27,8 @@ FOLLOW_UP_NAME          = 'FS_FOLLOW_UP'
 DATA_DISTRIBUTION_TEMPORAL = False
 FILTER_MANAGER_NAME     = 'FS_FILTER_MANAGER'
-END_PERIOD              = 'UNTIL_CHANGED' #'9999-01-01 00:00:00'
-FEATURE_STORE_TIME      = None #'9999-01-01 00:00:00'
+END_PERIOD              = 'UNTIL_CHANGED' #'9999-01-01 00:00:00+00:00'
+FEATURE_STORE_TIME      = None #'9999-01-01 00:00:00+00:00'
 FEATURE_VERSION_DEFAULT = 'dev.0.0'
 DISPLAY_LOGS            = True
 DEBUG_MODE              = False

tdfs4ds/feature_store/feature_data_processing.py CHANGED Viewed

@@ -329,6 +329,14 @@ def prepare_feature_ingestion(df, entity_id, feature_names, feature_versions=Non
         nb_duplicates = tdml.execute_sql(query_test_unicity).fetchall()[0][0]
         if nb_duplicates is not None and nb_duplicates > 0:
             tdfs4ds.logger.error(f"The process generates {nb_duplicates} duplicates")
+            query_test_unicity = f"""
+            SELECT TOP 3
+            {output_columns_unicity}
+            , count(*) as n
+            FROM {_get_database_username()}.{volatile_table_name}
+            GROUP BY {output_columns_unicity}
+            HAVING n > 1
+            """
             raise ValueError("Invalid process: the process generates duplicates.")
         #tdfs4ds.logger.info(f"No duplicate found.")
     except Exception as e:

tdfs4ds/utils/info.py CHANGED Viewed

@@ -137,19 +137,11 @@ def extract_partition_content(partitioning):
     Returns:
         str: The content within the parentheses after 'PARTITION BY', or None if no match is found.
     """
-    # First extraction: Get the content within parentheses after 'PARTITION BY'
-    pattern = r'PARTITION\s+BY\s*\(\s*(.*?)\s*\)'
+    pattern = r'PARTITION\s+BY\s*\((.*)\)'  # Matches content within outer parentheses after PARTITION BY
     match = re.search(pattern, partitioning, re.DOTALL)
     if match:
-        result = match.group(1)
-        # Second extraction: Get the content within the inner parentheses
-        inner_pattern = r'\((.*)\)'
-        inner_match = re.search(inner_pattern, result, re.DOTALL)
-        if inner_match:
-            return inner_match.group(1)
-        else:
-            return result
+        return match.group(1).strip()
     else:
         return None

tdfs4ds/utils/time_management.py CHANGED Viewed

@@ -3,6 +3,7 @@ import datetime
 import tdfs4ds
 import numpy as np
+import pandas as pd
 def get_hidden_table_name(table_name):
     return table_name + '_HIDDEN'
@@ -58,14 +59,20 @@ class TimeManager:
             'BUSINESS_DATE' : df[time_column]
         })[['time_id','BUSINESS_DATE']]
+        type_BUSINESS_DATE = tdfs4ds.utils.info.get_feature_types_sql_format(df_)['BUSINESS_DATE']
+        if 'TIMESTAMP' in type_BUSINESS_DATE.upper() and 'ZONE' not in type_BUSINESS_DATE.upper():
+            print(f"data type of the time colum has been modified from {type_BUSINESS_DATE} to {type_BUSINESS_DATE + ' WITH TIME ZONE'}")
+            type_BUSINESS_DATE = type_BUSINESS_DATE + ' WITH TIME ZONE'
+            df_ = df_.assign(type_BUSINESS_DATE = tdml.sqlalchemy.literal_column(f"CAST(BUSINESS_DATE AS {type_BUSINESS_DATE})"))
         d_ = {x[0]: x[1] for x in df_._td_column_names_and_types}
-        self.data_type = d_['BUSINESS_DATE']
+        self.data_type = type_BUSINESS_DATE #d_['BUSINESS_DATE']
         df_.to_sql(
             table_name    = self.table_name,
             schema_name   = self.schema_name,
             if_exists     = 'replace',
-            primary_index = ['time_id']
+            primary_index = ['time_id'],
         )
         query = f"""
@@ -127,36 +134,40 @@ class TimeManager:
         Returns:
             DataFrame: The table data as a DataFrame.
         """
-        return tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name))
+        cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
+        return pd.DataFrame(tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(), columns=cols)
     def get_date_in_the_past(self):
         """
         Retrieves the earliest date and time value from the table.
         Returns:
-            str: The earliest date and time value as a formatted string ('YYYY-MM-DD HH:MM:SS').
+            str: The earliest date and time value as a formatted string
+                ('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
         """
-        # '9999-01-01 00:00:00'
-        date_obj = self.display().to_pandas().reset_index().BUSINESS_DATE.values[0]
+        # Use iloc to preserve timezone awareness from pandas
+        date_obj = self.display().BUSINESS_DATE.iloc[0]
-        if isinstance(date_obj, datetime.datetime):
-            # print("temp is a datetime.datetime object")
+        if isinstance(date_obj, pd.Timestamp):
+            datetime_obj = date_obj.to_pydatetime()
+        elif isinstance(date_obj, datetime.datetime):
             datetime_obj = date_obj
         elif isinstance(date_obj, datetime.date):
-            # print("temp is a datetime.date object")
-            # Convert date object to a datetime object at midnight (00:00:00)
             datetime_obj = datetime.datetime.combine(date_obj, datetime.time.min)
         elif isinstance(date_obj, np.datetime64):
-            # Case when the object is a numpy.datetime64, convert it to datetime
-            datetime_obj = date_obj.astype('datetime64[ms]').astype(datetime.datetime)
+            datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
         else:
-            print("temp is neither a datetime.date nor a datetime.datetime object")
+            print("temp is of unrecognized type")
             print('temp', date_obj)
             print('temp type', type(date_obj))
             return
-        # Convert datetime object to string
-        output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
+        # Format with timezone offset if available
+        if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
+            output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
+        else:
+            output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
         return output_string

{tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tdfs4ds
-Version: 0.2.4.12
+Version: 0.2.4.14
 Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
 Author: Denis Molin
 Requires-Python: >=3.6

{tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.14.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
 tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
 tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
 tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
-tdfs4ds/__init__.py,sha256=wIYx8hEc5LsvT8fmzHOM80aOlZXKjcdzMPyRYF1_D6U,65845
+tdfs4ds/__init__.py,sha256=GfLqTgZOCeV0h2_ceuOsqeVQmb1kN4X9RTlkeTZ-86w,65857
 tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
 tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
 tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -14,7 +14,7 @@ tdfs4ds/dataset/dataset.py,sha256=caiQwT-RtdPe5MDtsynWMm1n12OxftgMp7_BR9SCHKw,53
 tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
 tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
 tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
-tdfs4ds/feature_store/feature_data_processing.py,sha256=PrhwJUYsAHXVsw5ItfNaYcZTCEoXLG6VjmEHEu6m25Q,42354
+tdfs4ds/feature_store/feature_data_processing.py,sha256=ZLr1MqSfqcHO4KuybKiiKnt9cPvQRhdrLRMpZyPsYXg,42643
 tdfs4ds/feature_store/feature_query_retrieval.py,sha256=zuHRZhL6-qyLpPS7mWgRy1WingSN5iibkbi53Q7jfAs,33834
 tdfs4ds/feature_store/feature_store_management.py,sha256=e_hBsGhtqA6vHBu2Mhy4URkYe4SFaHijXXdqqWr-3tg,56154
 tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
@@ -24,12 +24,12 @@ tdfs4ds/process_store/process_registration_management.py,sha256=F8VlBoL-de98KnkM
 tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIqPVbHpuIyyvsaNrek6b1iPk8avJMI,16088
 tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
 tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
-tdfs4ds/utils/info.py,sha256=Szc4IhbzfFp6MQlZLUb5Jk0yORD9OzpvsmLscg0YdaI,12364
+tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
 tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,28670
 tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
-tdfs4ds/utils/time_management.py,sha256=_jbwdyZH4Yr3VzbUrq6X93FpXDCDEdH0iv56vX7j8mA,8446
+tdfs4ds/utils/time_management.py,sha256=rVxtIXcFtQih2UabAtos4DK-j9MPqzYVieIz_SvySZE,9241
 tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
-tdfs4ds-0.2.4.12.dist-info/METADATA,sha256=jFv6iNmJNUB3vgsPn0UBsHPqP6I6mL9GiG35XVRVtE8,11945
-tdfs4ds-0.2.4.12.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-tdfs4ds-0.2.4.12.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
-tdfs4ds-0.2.4.12.dist-info/RECORD,,
+tdfs4ds-0.2.4.14.dist-info/METADATA,sha256=uaiMXMjkNR7aA6yZitLJEYAzdigDKhF5ozzm7bJPrlA,11945
+tdfs4ds-0.2.4.14.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+tdfs4ds-0.2.4.14.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
+tdfs4ds-0.2.4.14.dist-info/RECORD,,

{tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

tdfs4ds 0.2.4.12__py3-none-any.whl → 0.2.4.14__py3-none-any.whl

tdfs4ds 0.2.4.12py3-none-any.whl → 0.2.4.14py3-none-any.whl