tdfs4ds 0.2.4.17__py3-none-any.whl → 0.2.4.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.17'
1
+ __version__ = '0.2.4.18'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
@@ -45,47 +45,89 @@ class TimeManager:
45
45
 
46
46
  def load_time_steps(self, df, time_column):
47
47
  """
48
- Loads a new filter into the table and updates the view to reflect this filter.
48
+ Load time steps into the table and update the view accordingly.
49
49
 
50
- This method takes a DataFrame as input, assigns filter IDs to each row, and updates or replaces the table and view to reflect the new filter configuration.
50
+ This method:
51
+ 1. Creates a new DataFrame with a sequential time_id and BUSINESS_DATE.
52
+ 2. Ensures BUSINESS_DATE has the correct SQL data type.
53
+ 3. Drops and recreates the target table with the appropriate schema.
54
+ 4. Inserts the new data into the table.
55
+ 5. Updates the view to reference the first time step.
56
+ 6. Stores the number of time steps in `self.nb_time_steps`.
51
57
 
52
58
  Args:
53
- df (DataFrame): The data containing the new filter configuration.
59
+ df (pd.DataFrame): The input DataFrame containing time data.
60
+ time_column (str): The column name representing time.
54
61
  """
55
62
 
56
- df_ = df.assign(**{
57
- 'time_id': tdml.sqlalchemy.literal_column(
58
- f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})", tdml.BIGINT()),
59
- 'BUSINESS_DATE' : df[time_column]
60
- })[['time_id','BUSINESS_DATE']]
61
-
62
- type_BUSINESS_DATE = tdfs4ds.utils.info.get_feature_types_sql_format(df_)['BUSINESS_DATE']
63
- if 'TIMESTAMP' in type_BUSINESS_DATE.upper() and 'ZONE' not in type_BUSINESS_DATE.upper():
64
- print(f"data type of the time colum has been modified from {type_BUSINESS_DATE} to {type_BUSINESS_DATE + ' WITH TIME ZONE'}")
65
- type_BUSINESS_DATE = type_BUSINESS_DATE + ' WITH TIME ZONE'
66
- df_ = df_.assign(type_BUSINESS_DATE = tdml.sqlalchemy.literal_column(f"CAST(BUSINESS_DATE AS {type_BUSINESS_DATE})"))
67
-
68
- d_ = {x[0]: x[1] for x in df_._td_column_names_and_types}
69
- self.data_type = type_BUSINESS_DATE #d_['BUSINESS_DATE']
63
+ # Step 1: Build DataFrame with time_id and BUSINESS_DATE
64
+ df_ = df.assign(
65
+ time_id=tdml.sqlalchemy.literal_column(
66
+ f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
67
+ tdml.BIGINT()
68
+ ),
69
+ BUSINESS_DATE=df[time_column]
70
+ )[["time_id", "BUSINESS_DATE"]]
71
+
72
+ # Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
73
+ sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
74
+ type_business_date = sql_types["BUSINESS_DATE"]
75
+
76
+ if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
77
+ new_type = f"{type_business_date} WITH TIME ZONE"
78
+ print(
79
+ f"Data type of the time column modified from {type_business_date} "
80
+ f"to {new_type}"
81
+ )
82
+ type_business_date = new_type
83
+ sql_types["BUSINESS_DATE"] = new_type
84
+
85
+ df_ = df_.assign(
86
+ BUSINESS_DATE=tdml.sqlalchemy.literal_column(
87
+ f"CAST(BUSINESS_DATE AS {new_type})"
88
+ )
89
+ )
90
+
91
+ self.data_type = type_business_date
92
+
93
+ # Step 3: Drop table if it exists
94
+ try:
95
+ tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
96
+ except Exception as e:
97
+ if tdfs4ds.DEBUG_MODE:
98
+ print(f"Error dropping table {self.schema_name}.{self.table_name}: {e}")
99
+
100
+ # Step 4: Recreate table
101
+ ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
102
+ create_table_sql = f"""
103
+ CREATE TABLE {self.schema_name}.{self.table_name} (
104
+ {ddl}
105
+ )
106
+ PRIMARY INDEX (time_id)
107
+ """
108
+ tdml.execute_sql(create_table_sql)
70
109
 
71
- df_.to_sql(
72
- table_name = self.table_name,
73
- schema_name = self.schema_name,
74
- if_exists = 'replace',
75
- primary_index = ['time_id'],
110
+ # Step 5: Insert data
111
+ df_[list(sql_types.keys())].to_sql(
112
+ table_name=self.table_name,
113
+ schema_name=self.schema_name,
114
+ if_exists="append"
76
115
  )
77
116
 
78
- query = f"""
79
- REPLACE VIEW {self.schema_name}.{self.view_name} AS
80
- SEL BUSINESS_DATE
81
- FROM {self.schema_name}.{self.table_name}
82
- WHERE time_id = 1
83
- """
84
-
85
- tdml.execute_sql(query)
117
+ # Step 6: Update view
118
+ create_view_sql = f"""
119
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
120
+ SELECT BUSINESS_DATE
121
+ FROM {self.schema_name}.{self.table_name}
122
+ WHERE time_id = 1
123
+ """
124
+ tdml.execute_sql(create_view_sql)
86
125
 
87
- self.nb_time_steps = tdml.execute_sql(
88
- f"SEL MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}").fetchall()[0][0]
126
+ # Step 7: Store number of time steps
127
+ result = tdml.execute_sql(
128
+ f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
129
+ ).fetchall()
130
+ self.nb_time_steps = result[0][0]
89
131
 
90
132
 
91
133
  def _exists(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.17
3
+ Version: 0.2.4.18
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=CU5AFwETPm__QJJhmyxIE35XgkKB8rNmJaSrA-0GgFk,64168
5
+ tdfs4ds/__init__.py,sha256=pDeTVUVWwzAj5A7GxZy6KTKJkAYuzm5NbmKwhFOFa7I,64168
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -27,9 +27,9 @@ tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH
27
27
  tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
28
28
  tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,28670
29
29
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
30
- tdfs4ds/utils/time_management.py,sha256=rVxtIXcFtQih2UabAtos4DK-j9MPqzYVieIz_SvySZE,9241
30
+ tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
31
31
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
32
- tdfs4ds-0.2.4.17.dist-info/METADATA,sha256=p_BzFpsW4I4oBIQryiPuid20dyzJbHw23_R8hrj_quQ,11944
33
- tdfs4ds-0.2.4.17.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
- tdfs4ds-0.2.4.17.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
- tdfs4ds-0.2.4.17.dist-info/RECORD,,
32
+ tdfs4ds-0.2.4.18.dist-info/METADATA,sha256=I3XSCDdFlXlJBYpj4B5liyIU99fkAGWstaaZGvQYDdg,11944
33
+ tdfs4ds-0.2.4.18.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
+ tdfs4ds-0.2.4.18.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
+ tdfs4ds-0.2.4.18.dist-info/RECORD,,