tdfs4ds 0.2.4.12__py3-none-any.whl → 0.2.4.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.12'
1
+ __version__ = '0.2.4.14'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
@@ -27,8 +27,8 @@ FOLLOW_UP_NAME = 'FS_FOLLOW_UP'
27
27
  DATA_DISTRIBUTION_TEMPORAL = False
28
28
  FILTER_MANAGER_NAME = 'FS_FILTER_MANAGER'
29
29
 
30
- END_PERIOD = 'UNTIL_CHANGED' #'9999-01-01 00:00:00'
31
- FEATURE_STORE_TIME = None #'9999-01-01 00:00:00'
30
+ END_PERIOD = 'UNTIL_CHANGED' #'9999-01-01 00:00:00+00:00'
31
+ FEATURE_STORE_TIME = None #'9999-01-01 00:00:00+00:00'
32
32
  FEATURE_VERSION_DEFAULT = 'dev.0.0'
33
33
  DISPLAY_LOGS = True
34
34
  DEBUG_MODE = False
@@ -329,6 +329,14 @@ def prepare_feature_ingestion(df, entity_id, feature_names, feature_versions=Non
329
329
  nb_duplicates = tdml.execute_sql(query_test_unicity).fetchall()[0][0]
330
330
  if nb_duplicates is not None and nb_duplicates > 0:
331
331
  tdfs4ds.logger.error(f"The process generates {nb_duplicates} duplicates")
332
+ query_test_unicity = f"""
333
+ SELECT TOP 3
334
+ {output_columns_unicity}
335
+ , count(*) as n
336
+ FROM {_get_database_username()}.{volatile_table_name}
337
+ GROUP BY {output_columns_unicity}
338
+ HAVING n > 1
339
+ """
332
340
  raise ValueError("Invalid process: the process generates duplicates.")
333
341
  #tdfs4ds.logger.info(f"No duplicate found.")
334
342
  except Exception as e:
tdfs4ds/utils/info.py CHANGED
@@ -137,19 +137,11 @@ def extract_partition_content(partitioning):
137
137
  Returns:
138
138
  str: The content within the parentheses after 'PARTITION BY', or None if no match is found.
139
139
  """
140
- # First extraction: Get the content within parentheses after 'PARTITION BY'
141
- pattern = r'PARTITION\s+BY\s*\(\s*(.*?)\s*\)'
140
+ pattern = r'PARTITION\s+BY\s*\((.*)\)' # Matches content within outer parentheses after PARTITION BY
142
141
  match = re.search(pattern, partitioning, re.DOTALL)
143
142
 
144
143
  if match:
145
- result = match.group(1)
146
- # Second extraction: Get the content within the inner parentheses
147
- inner_pattern = r'\((.*)\)'
148
- inner_match = re.search(inner_pattern, result, re.DOTALL)
149
- if inner_match:
150
- return inner_match.group(1)
151
- else:
152
- return result
144
+ return match.group(1).strip()
153
145
  else:
154
146
  return None
155
147
 
@@ -3,6 +3,7 @@ import datetime
3
3
 
4
4
  import tdfs4ds
5
5
  import numpy as np
6
+ import pandas as pd
6
7
 
7
8
  def get_hidden_table_name(table_name):
8
9
  return table_name + '_HIDDEN'
@@ -58,14 +59,20 @@ class TimeManager:
58
59
  'BUSINESS_DATE' : df[time_column]
59
60
  })[['time_id','BUSINESS_DATE']]
60
61
 
62
+ type_BUSINESS_DATE = tdfs4ds.utils.info.get_feature_types_sql_format(df_)['BUSINESS_DATE']
63
+ if 'TIMESTAMP' in type_BUSINESS_DATE.upper() and 'ZONE' not in type_BUSINESS_DATE.upper():
64
+ print(f"data type of the time colum has been modified from {type_BUSINESS_DATE} to {type_BUSINESS_DATE + ' WITH TIME ZONE'}")
65
+ type_BUSINESS_DATE = type_BUSINESS_DATE + ' WITH TIME ZONE'
66
+ df_ = df_.assign(type_BUSINESS_DATE = tdml.sqlalchemy.literal_column(f"CAST(BUSINESS_DATE AS {type_BUSINESS_DATE})"))
67
+
61
68
  d_ = {x[0]: x[1] for x in df_._td_column_names_and_types}
62
- self.data_type = d_['BUSINESS_DATE']
69
+ self.data_type = type_BUSINESS_DATE #d_['BUSINESS_DATE']
63
70
 
64
71
  df_.to_sql(
65
72
  table_name = self.table_name,
66
73
  schema_name = self.schema_name,
67
74
  if_exists = 'replace',
68
- primary_index = ['time_id']
75
+ primary_index = ['time_id'],
69
76
  )
70
77
 
71
78
  query = f"""
@@ -127,36 +134,40 @@ class TimeManager:
127
134
  Returns:
128
135
  DataFrame: The table data as a DataFrame.
129
136
  """
130
- return tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name))
137
+
138
+ cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
139
+ return pd.DataFrame(tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(), columns=cols)
131
140
 
132
141
  def get_date_in_the_past(self):
133
142
  """
134
143
  Retrieves the earliest date and time value from the table.
135
144
 
136
145
  Returns:
137
- str: The earliest date and time value as a formatted string ('YYYY-MM-DD HH:MM:SS').
146
+ str: The earliest date and time value as a formatted string
147
+ ('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
138
148
  """
139
- # '9999-01-01 00:00:00'
140
- date_obj = self.display().to_pandas().reset_index().BUSINESS_DATE.values[0]
149
+ # Use iloc to preserve timezone awareness from pandas
150
+ date_obj = self.display().BUSINESS_DATE.iloc[0]
141
151
 
142
- if isinstance(date_obj, datetime.datetime):
143
- # print("temp is a datetime.datetime object")
152
+ if isinstance(date_obj, pd.Timestamp):
153
+ datetime_obj = date_obj.to_pydatetime()
154
+ elif isinstance(date_obj, datetime.datetime):
144
155
  datetime_obj = date_obj
145
156
  elif isinstance(date_obj, datetime.date):
146
- # print("temp is a datetime.date object")
147
- # Convert date object to a datetime object at midnight (00:00:00)
148
157
  datetime_obj = datetime.datetime.combine(date_obj, datetime.time.min)
149
158
  elif isinstance(date_obj, np.datetime64):
150
- # Case when the object is a numpy.datetime64, convert it to datetime
151
- datetime_obj = date_obj.astype('datetime64[ms]').astype(datetime.datetime)
159
+ datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
152
160
  else:
153
- print("temp is neither a datetime.date nor a datetime.datetime object")
161
+ print("temp is of unrecognized type")
154
162
  print('temp', date_obj)
155
163
  print('temp type', type(date_obj))
156
164
  return
157
165
 
158
- # Convert datetime object to string
159
- output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
166
+ # Format with timezone offset if available
167
+ if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
168
+ output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
169
+ else:
170
+ output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
160
171
 
161
172
  return output_string
162
173
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.12
3
+ Version: 0.2.4.14
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=wIYx8hEc5LsvT8fmzHOM80aOlZXKjcdzMPyRYF1_D6U,65845
5
+ tdfs4ds/__init__.py,sha256=GfLqTgZOCeV0h2_ceuOsqeVQmb1kN4X9RTlkeTZ-86w,65857
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -14,7 +14,7 @@ tdfs4ds/dataset/dataset.py,sha256=caiQwT-RtdPe5MDtsynWMm1n12OxftgMp7_BR9SCHKw,53
14
14
  tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
15
15
  tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
16
16
  tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
17
- tdfs4ds/feature_store/feature_data_processing.py,sha256=PrhwJUYsAHXVsw5ItfNaYcZTCEoXLG6VjmEHEu6m25Q,42354
17
+ tdfs4ds/feature_store/feature_data_processing.py,sha256=ZLr1MqSfqcHO4KuybKiiKnt9cPvQRhdrLRMpZyPsYXg,42643
18
18
  tdfs4ds/feature_store/feature_query_retrieval.py,sha256=zuHRZhL6-qyLpPS7mWgRy1WingSN5iibkbi53Q7jfAs,33834
19
19
  tdfs4ds/feature_store/feature_store_management.py,sha256=e_hBsGhtqA6vHBu2Mhy4URkYe4SFaHijXXdqqWr-3tg,56154
20
20
  tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
@@ -24,12 +24,12 @@ tdfs4ds/process_store/process_registration_management.py,sha256=F8VlBoL-de98KnkM
24
24
  tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIqPVbHpuIyyvsaNrek6b1iPk8avJMI,16088
25
25
  tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
26
26
  tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
27
- tdfs4ds/utils/info.py,sha256=Szc4IhbzfFp6MQlZLUb5Jk0yORD9OzpvsmLscg0YdaI,12364
27
+ tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
28
28
  tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,28670
29
29
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
30
- tdfs4ds/utils/time_management.py,sha256=_jbwdyZH4Yr3VzbUrq6X93FpXDCDEdH0iv56vX7j8mA,8446
30
+ tdfs4ds/utils/time_management.py,sha256=rVxtIXcFtQih2UabAtos4DK-j9MPqzYVieIz_SvySZE,9241
31
31
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
32
- tdfs4ds-0.2.4.12.dist-info/METADATA,sha256=jFv6iNmJNUB3vgsPn0UBsHPqP6I6mL9GiG35XVRVtE8,11945
33
- tdfs4ds-0.2.4.12.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
- tdfs4ds-0.2.4.12.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
- tdfs4ds-0.2.4.12.dist-info/RECORD,,
32
+ tdfs4ds-0.2.4.14.dist-info/METADATA,sha256=uaiMXMjkNR7aA6yZitLJEYAzdigDKhF5ozzm7bJPrlA,11945
33
+ tdfs4ds-0.2.4.14.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
+ tdfs4ds-0.2.4.14.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
+ tdfs4ds-0.2.4.14.dist-info/RECORD,,