tdfs4ds 0.2.4.17__py3-none-any.whl → 0.2.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/utils/time_management.py +74 -32
- {tdfs4ds-0.2.4.17.dist-info → tdfs4ds-0.2.4.18.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.17.dist-info → tdfs4ds-0.2.4.18.dist-info}/RECORD +6 -6
- {tdfs4ds-0.2.4.17.dist-info → tdfs4ds-0.2.4.18.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.17.dist-info → tdfs4ds-0.2.4.18.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
tdfs4ds/utils/time_management.py
CHANGED
|
@@ -45,47 +45,89 @@ class TimeManager:
|
|
|
45
45
|
|
|
46
46
|
def load_time_steps(self, df, time_column):
|
|
47
47
|
"""
|
|
48
|
-
|
|
48
|
+
Load time steps into the table and update the view accordingly.
|
|
49
49
|
|
|
50
|
-
This method
|
|
50
|
+
This method:
|
|
51
|
+
1. Creates a new DataFrame with a sequential time_id and BUSINESS_DATE.
|
|
52
|
+
2. Ensures BUSINESS_DATE has the correct SQL data type.
|
|
53
|
+
3. Drops and recreates the target table with the appropriate schema.
|
|
54
|
+
4. Inserts the new data into the table.
|
|
55
|
+
5. Updates the view to reference the first time step.
|
|
56
|
+
6. Stores the number of time steps in `self.nb_time_steps`.
|
|
51
57
|
|
|
52
58
|
Args:
|
|
53
|
-
df (DataFrame): The
|
|
59
|
+
df (pd.DataFrame): The input DataFrame containing time data.
|
|
60
|
+
time_column (str): The column name representing time.
|
|
54
61
|
"""
|
|
55
62
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
63
|
+
# Step 1: Build DataFrame with time_id and BUSINESS_DATE
|
|
64
|
+
df_ = df.assign(
|
|
65
|
+
time_id=tdml.sqlalchemy.literal_column(
|
|
66
|
+
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
|
|
67
|
+
tdml.BIGINT()
|
|
68
|
+
),
|
|
69
|
+
BUSINESS_DATE=df[time_column]
|
|
70
|
+
)[["time_id", "BUSINESS_DATE"]]
|
|
71
|
+
|
|
72
|
+
# Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
|
|
73
|
+
sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
|
|
74
|
+
type_business_date = sql_types["BUSINESS_DATE"]
|
|
75
|
+
|
|
76
|
+
if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
|
|
77
|
+
new_type = f"{type_business_date} WITH TIME ZONE"
|
|
78
|
+
print(
|
|
79
|
+
f"Data type of the time column modified from {type_business_date} "
|
|
80
|
+
f"to {new_type}"
|
|
81
|
+
)
|
|
82
|
+
type_business_date = new_type
|
|
83
|
+
sql_types["BUSINESS_DATE"] = new_type
|
|
84
|
+
|
|
85
|
+
df_ = df_.assign(
|
|
86
|
+
BUSINESS_DATE=tdml.sqlalchemy.literal_column(
|
|
87
|
+
f"CAST(BUSINESS_DATE AS {new_type})"
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
self.data_type = type_business_date
|
|
92
|
+
|
|
93
|
+
# Step 3: Drop table if it exists
|
|
94
|
+
try:
|
|
95
|
+
tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
|
|
96
|
+
except Exception as e:
|
|
97
|
+
if tdfs4ds.DEBUG_MODE:
|
|
98
|
+
print(f"Error dropping table {self.schema_name}.{self.table_name}: {e}")
|
|
99
|
+
|
|
100
|
+
# Step 4: Recreate table
|
|
101
|
+
ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
|
|
102
|
+
create_table_sql = f"""
|
|
103
|
+
CREATE TABLE {self.schema_name}.{self.table_name} (
|
|
104
|
+
{ddl}
|
|
105
|
+
)
|
|
106
|
+
PRIMARY INDEX (time_id)
|
|
107
|
+
"""
|
|
108
|
+
tdml.execute_sql(create_table_sql)
|
|
70
109
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
110
|
+
# Step 5: Insert data
|
|
111
|
+
df_[list(sql_types.keys())].to_sql(
|
|
112
|
+
table_name=self.table_name,
|
|
113
|
+
schema_name=self.schema_name,
|
|
114
|
+
if_exists="append"
|
|
76
115
|
)
|
|
77
116
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
tdml.execute_sql(
|
|
117
|
+
# Step 6: Update view
|
|
118
|
+
create_view_sql = f"""
|
|
119
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
120
|
+
SELECT BUSINESS_DATE
|
|
121
|
+
FROM {self.schema_name}.{self.table_name}
|
|
122
|
+
WHERE time_id = 1
|
|
123
|
+
"""
|
|
124
|
+
tdml.execute_sql(create_view_sql)
|
|
86
125
|
|
|
87
|
-
|
|
88
|
-
|
|
126
|
+
# Step 7: Store number of time steps
|
|
127
|
+
result = tdml.execute_sql(
|
|
128
|
+
f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
129
|
+
).fetchall()
|
|
130
|
+
self.nb_time_steps = result[0][0]
|
|
89
131
|
|
|
90
132
|
|
|
91
133
|
def _exists(self):
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=pDeTVUVWwzAj5A7GxZy6KTKJkAYuzm5NbmKwhFOFa7I,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -27,9 +27,9 @@ tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH
|
|
|
27
27
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
28
28
|
tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,28670
|
|
29
29
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
|
-
tdfs4ds/utils/time_management.py,sha256=
|
|
30
|
+
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
31
31
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
32
|
+
tdfs4ds-0.2.4.18.dist-info/METADATA,sha256=I3XSCDdFlXlJBYpj4B5liyIU99fkAGWstaaZGvQYDdg,11944
|
|
33
|
+
tdfs4ds-0.2.4.18.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
34
|
+
tdfs4ds-0.2.4.18.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
35
|
+
tdfs4ds-0.2.4.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|