tdfs4ds 0.2.4.31__py3-none-any.whl → 0.2.4.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/utils/filter_management.py +418 -133
- tdfs4ds/utils/time_management.py +502 -95
- {tdfs4ds-0.2.4.31.dist-info → tdfs4ds-0.2.4.32.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.31.dist-info → tdfs4ds-0.2.4.32.dist-info}/RECORD +7 -7
- {tdfs4ds-0.2.4.31.dist-info → tdfs4ds-0.2.4.32.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.31.dist-info → tdfs4ds-0.2.4.32.dist-info}/top_level.txt +0 -0
tdfs4ds/utils/time_management.py
CHANGED
|
@@ -1,32 +1,55 @@
|
|
|
1
1
|
import teradataml as tdml
|
|
2
2
|
import datetime
|
|
3
|
+
from tdfs4ds import logger
|
|
4
|
+
import re
|
|
3
5
|
|
|
4
6
|
import tdfs4ds
|
|
5
7
|
import numpy as np
|
|
6
8
|
import pandas as pd
|
|
7
9
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
+
|
|
11
|
+
def get_hidden_table_name(table_name: str) -> str:
|
|
12
|
+
"""Return the hidden table name associated with a public view name.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
table_name: Base table or view name.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The hidden table name (e.g., "<name>_HIDDEN").
|
|
19
|
+
"""
|
|
20
|
+
return f"{table_name}_HIDDEN"
|
|
21
|
+
|
|
22
|
+
|
|
10
23
|
class TimeManager:
|
|
11
24
|
"""
|
|
12
|
-
|
|
25
|
+
Manage time-step metadata for a Teradata table and a companion view.
|
|
26
|
+
|
|
27
|
+
This class maintains a hidden physical table (``<view>_HIDDEN``) that stores
|
|
28
|
+
sequential time steps (``TIME_ID``) mapped to a business date/time
|
|
29
|
+
(``BUSINESS_DATE``). It also maintains a public view (``<view>``) that
|
|
30
|
+
exposes the *current* business date/time by filtering the hidden table on
|
|
31
|
+
a specific ``TIME_ID``.
|
|
13
32
|
|
|
14
33
|
Attributes:
|
|
15
|
-
schema_name
|
|
16
|
-
table_name
|
|
17
|
-
|
|
34
|
+
schema_name: Target database/schema.
|
|
35
|
+
table_name: Hidden physical table name (``<view>_HIDDEN``).
|
|
36
|
+
view_name: Public view name (``<view>``).
|
|
37
|
+
time_id: Name of the incrementing identifier column (default: ``"time_id"``).
|
|
38
|
+
nb_time_steps: Number of time steps currently stored in the hidden table
|
|
39
|
+
(``None`` if the hidden table does not exist yet).
|
|
40
|
+
data_type: SQL data type of ``BUSINESS_DATE`` (e.g., ``DATE``,
|
|
41
|
+
``TIMESTAMP WITH TIME ZONE``), set after load/inspection.
|
|
18
42
|
"""
|
|
19
43
|
|
|
20
|
-
def __init__(self, table_name, schema_name):
|
|
21
|
-
"""
|
|
22
|
-
Initializes the TimeManager with a table name, schema name, and optionally a data type.
|
|
44
|
+
def __init__(self, table_name: str, schema_name: str) -> None:
|
|
45
|
+
"""Initialize a TimeManager for an existing or future hidden table/view.
|
|
23
46
|
|
|
24
|
-
|
|
47
|
+
On initialization, if the hidden table already exists, the instance
|
|
48
|
+
inspects it to populate ``data_type`` and ``nb_time_steps``.
|
|
25
49
|
|
|
26
50
|
Args:
|
|
27
|
-
table_name
|
|
28
|
-
schema_name
|
|
29
|
-
data_type (str, optional): Type of the date/time data. Defaults to 'DATE'.
|
|
51
|
+
table_name: Base public view name to manage (e.g., ``"MY_VIEW"``).
|
|
52
|
+
schema_name: Schema that contains/should contain the objects.
|
|
30
53
|
"""
|
|
31
54
|
self.schema_name = schema_name
|
|
32
55
|
self.table_name = get_hidden_table_name(table_name)
|
|
@@ -35,39 +58,52 @@ class TimeManager:
|
|
|
35
58
|
self.nb_time_steps = None
|
|
36
59
|
self.data_type = None
|
|
37
60
|
|
|
61
|
+
logger.debug(
|
|
62
|
+
"Initializing TimeManager for schema=%s, view=%s, table=%s",
|
|
63
|
+
self.schema_name, self.view_name, self.table_name
|
|
64
|
+
)
|
|
65
|
+
|
|
38
66
|
if self._exists():
|
|
67
|
+
logger.debug("Hidden table %s.%s exists; inspecting metadata.", self.schema_name, self.table_name)
|
|
39
68
|
df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
40
69
|
d_ = {x[0]: x[1] for x in df._td_column_names_and_types}
|
|
41
|
-
self.data_type = d_
|
|
42
|
-
self.nb_time_steps
|
|
43
|
-
f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
|
|
44
|
-
|
|
70
|
+
self.data_type = d_.get('BUSINESS_DATE')
|
|
71
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
72
|
+
f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
|
|
73
|
+
).fetchall()[0][0]
|
|
74
|
+
logger.info(
|
|
75
|
+
"Detected BUSINESS_DATE data_type=%s with nb_time_steps=%s",
|
|
76
|
+
self.data_type, self.nb_time_steps
|
|
77
|
+
)
|
|
45
78
|
|
|
46
|
-
def load_time_steps(self, df, time_column):
|
|
47
|
-
"""
|
|
48
|
-
Load time steps into the table and update the view accordingly.
|
|
79
|
+
def load_time_steps(self, df: pd.DataFrame, time_column: str) -> None:
|
|
80
|
+
"""Load/replace the hidden table and (re)point the public view to step 1.
|
|
49
81
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
82
|
+
Workflow:
|
|
83
|
+
1) Build a DataFrame with sequential ``TIME_ID`` and ``BUSINESS_DATE``.
|
|
84
|
+
2) Infer SQL types and upcast to ``TIMESTAMP WITH TIME ZONE`` when
|
|
85
|
+
needed (to preserve offsets).
|
|
86
|
+
3) Drop and recreate the hidden table with inferred schema.
|
|
87
|
+
4) Append the rows.
|
|
88
|
+
5) Replace the public view to expose ``TIME_ID = 1``.
|
|
89
|
+
6) Store ``nb_time_steps``.
|
|
57
90
|
|
|
58
91
|
Args:
|
|
59
|
-
df
|
|
60
|
-
time_column
|
|
92
|
+
df: Input pandas DataFrame with a time column.
|
|
93
|
+
time_column: Name of the time column in ``df`` to use as ``BUSINESS_DATE``.
|
|
61
94
|
"""
|
|
95
|
+
logger.info("Loading time steps into %s.%s from column '%s'.",
|
|
96
|
+
self.schema_name, self.table_name, time_column)
|
|
62
97
|
|
|
63
|
-
# Step 1: Build DataFrame with
|
|
98
|
+
# Step 1: Build DataFrame with TIME_ID and BUSINESS_DATE
|
|
64
99
|
df_ = df.assign(
|
|
65
100
|
time_id=tdml.sqlalchemy.literal_column(
|
|
66
|
-
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
|
|
101
|
+
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
|
|
67
102
|
tdml.BIGINT()
|
|
68
103
|
),
|
|
69
104
|
BUSINESS_DATE=df[time_column]
|
|
70
105
|
)[["time_id", "BUSINESS_DATE"]]
|
|
106
|
+
logger.debug("Constructed intermediate DataFrame with TIME_ID and BUSINESS_DATE.")
|
|
71
107
|
|
|
72
108
|
# Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
|
|
73
109
|
sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
|
|
@@ -75,9 +111,9 @@ class TimeManager:
|
|
|
75
111
|
|
|
76
112
|
if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
|
|
77
113
|
new_type = f"{type_business_date} WITH TIME ZONE"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
114
|
+
logger.info(
|
|
115
|
+
"Upcasting BUSINESS_DATE from %s to %s to preserve timezone.",
|
|
116
|
+
type_business_date, new_type
|
|
81
117
|
)
|
|
82
118
|
type_business_date = new_type
|
|
83
119
|
sql_types["BUSINESS_DATE"] = new_type
|
|
@@ -89,13 +125,19 @@ class TimeManager:
|
|
|
89
125
|
)
|
|
90
126
|
|
|
91
127
|
self.data_type = type_business_date
|
|
128
|
+
logger.debug("Final BUSINESS_DATE SQL type: %s", self.data_type)
|
|
92
129
|
|
|
93
130
|
# Step 3: Drop table if it exists
|
|
94
131
|
try:
|
|
95
132
|
tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
|
|
133
|
+
logger.debug("Dropped existing table %s.%s (if existed).", self.schema_name, self.table_name)
|
|
96
134
|
except Exception as e:
|
|
135
|
+
# Not fatal; the table might not exist. Log at debug when in dev, warning otherwise.
|
|
136
|
+
msg = f"Error dropping table {self.schema_name}.{self.table_name}: {e}"
|
|
97
137
|
if tdfs4ds.DEBUG_MODE:
|
|
98
|
-
|
|
138
|
+
logger.debug(msg)
|
|
139
|
+
else:
|
|
140
|
+
logger.warning(msg)
|
|
99
141
|
|
|
100
142
|
# Step 4: Recreate table
|
|
101
143
|
ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
|
|
@@ -106,6 +148,7 @@ class TimeManager:
|
|
|
106
148
|
PRIMARY INDEX (time_id)
|
|
107
149
|
"""
|
|
108
150
|
tdml.execute_sql(create_table_sql)
|
|
151
|
+
logger.info("Created table %s.%s with schema: %s", self.schema_name, self.table_name, sql_types)
|
|
109
152
|
|
|
110
153
|
# Step 5: Insert data
|
|
111
154
|
df_[list(sql_types.keys())].to_sql(
|
|
@@ -113,6 +156,7 @@ class TimeManager:
|
|
|
113
156
|
schema_name=self.schema_name,
|
|
114
157
|
if_exists="append"
|
|
115
158
|
)
|
|
159
|
+
logger.info("Inserted %s time steps into %s.%s.", len(df_), self.schema_name, self.table_name)
|
|
116
160
|
|
|
117
161
|
# Step 6: Update view
|
|
118
162
|
create_view_sql = f"""
|
|
@@ -122,40 +166,72 @@ class TimeManager:
|
|
|
122
166
|
WHERE time_id = 1
|
|
123
167
|
"""
|
|
124
168
|
tdml.execute_sql(create_view_sql)
|
|
169
|
+
logger.debug("Replaced view %s.%s to point at TIME_ID=1.", self.schema_name, self.view_name)
|
|
125
170
|
|
|
126
171
|
# Step 7: Store number of time steps
|
|
127
172
|
result = tdml.execute_sql(
|
|
128
173
|
f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
129
174
|
).fetchall()
|
|
130
175
|
self.nb_time_steps = result[0][0]
|
|
176
|
+
logger.info("Time steps loaded. nb_time_steps=%s", self.nb_time_steps)
|
|
131
177
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"""
|
|
135
|
-
Checks if the table exists in the database.
|
|
178
|
+
def _exists(self) -> bool:
|
|
179
|
+
"""Check if the hidden table exists in the schema.
|
|
136
180
|
|
|
137
181
|
Returns:
|
|
138
|
-
|
|
182
|
+
True if the hidden table exists; False otherwise.
|
|
139
183
|
"""
|
|
184
|
+
exists = len([
|
|
185
|
+
x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
|
|
186
|
+
if x.lower().replace('"', '') == self.table_name.lower()
|
|
187
|
+
]) > 0
|
|
188
|
+
logger.debug("Hidden table %s.%s exists? %s", self.schema_name, self.table_name, exists)
|
|
189
|
+
return exists
|
|
140
190
|
|
|
141
|
-
|
|
142
|
-
|
|
191
|
+
def _drop(self, drop_view: bool = False, force: bool = False) -> None:
|
|
192
|
+
"""Drop the hidden table if we own it, and optionally the public view.
|
|
143
193
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
194
|
+
Args:
|
|
195
|
+
drop_view: If True, also drop the public view.
|
|
196
|
+
force: If True, drop the hidden table even if we don't own it.
|
|
197
|
+
|
|
198
|
+
Notes:
|
|
199
|
+
- The hidden table is dropped only if:
|
|
200
|
+
* self._owns_hidden is True, or
|
|
201
|
+
* force is True.
|
|
202
|
+
- The view can be dropped regardless of ownership when drop_view=True.
|
|
147
203
|
"""
|
|
148
|
-
# Drop
|
|
204
|
+
# Drop hidden table
|
|
149
205
|
if self._exists():
|
|
150
|
-
|
|
206
|
+
if getattr(self, "_owns_hidden", False) or force:
|
|
207
|
+
logger.info(
|
|
208
|
+
"Dropping hidden table %s.%s (force=%s).",
|
|
209
|
+
self.schema_name, self.table_name, force
|
|
210
|
+
)
|
|
211
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
|
|
212
|
+
else:
|
|
213
|
+
logger.warning(
|
|
214
|
+
"Refusing to drop hidden table %s.%s because this manager does not own it. "
|
|
215
|
+
"Use force=True to override.",
|
|
216
|
+
self.schema_name, self.table_name
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
logger.debug("Hidden table %s.%s does not exist.", self.schema_name, self.table_name)
|
|
151
220
|
|
|
221
|
+
# Optionally drop view
|
|
222
|
+
if drop_view:
|
|
223
|
+
try:
|
|
224
|
+
logger.info("Dropping view %s.%s.", self.schema_name, self.view_name)
|
|
225
|
+
tdml.execute_sql(f"DROP VIEW {self.schema_name}.{self.view_name}")
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.warning("Error dropping view %s.%s: %s", self.schema_name, self.view_name, e)
|
|
152
228
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
229
|
+
|
|
230
|
+
def update(self, time_id: int) -> None:
|
|
231
|
+
"""Point the public view at a specific ``TIME_ID``.
|
|
156
232
|
|
|
157
233
|
Args:
|
|
158
|
-
|
|
234
|
+
time_id: The time step identifier to expose via the public view.
|
|
159
235
|
"""
|
|
160
236
|
if self._exists():
|
|
161
237
|
query = f"""
|
|
@@ -164,32 +240,41 @@ class TimeManager:
|
|
|
164
240
|
FROM {self.schema_name}.{self.table_name}
|
|
165
241
|
WHERE TIME_ID = {time_id}
|
|
166
242
|
"""
|
|
167
|
-
|
|
168
243
|
if tdfs4ds.DEBUG_MODE:
|
|
169
|
-
|
|
244
|
+
logger.debug("Executing view update:\n%s", query)
|
|
170
245
|
tdml.execute_sql(query)
|
|
246
|
+
logger.info("Updated view %s.%s to TIME_ID=%s.", self.schema_name, self.view_name, time_id)
|
|
247
|
+
else:
|
|
248
|
+
logger.warning(
|
|
249
|
+
"Cannot update view: hidden table %s.%s does not exist.",
|
|
250
|
+
self.schema_name, self.table_name
|
|
251
|
+
)
|
|
171
252
|
|
|
172
|
-
def display(self):
|
|
173
|
-
"""
|
|
174
|
-
Displays the table.
|
|
253
|
+
def display(self) -> pd.DataFrame:
|
|
254
|
+
"""Return the current public view (one row: current BUSINESS_DATE).
|
|
175
255
|
|
|
176
256
|
Returns:
|
|
177
|
-
DataFrame
|
|
257
|
+
A pandas DataFrame with the current ``BUSINESS_DATE`` exposed by the view.
|
|
178
258
|
"""
|
|
179
|
-
|
|
259
|
+
logger.debug("Reading current BUSINESS_DATE from %s.%s.", self.schema_name, self.view_name)
|
|
180
260
|
cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
|
|
181
|
-
return pd.DataFrame(
|
|
261
|
+
return pd.DataFrame(
|
|
262
|
+
tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(),
|
|
263
|
+
columns=cols
|
|
264
|
+
)
|
|
182
265
|
|
|
183
|
-
def get_date_in_the_past(self):
|
|
184
|
-
"""
|
|
185
|
-
|
|
266
|
+
def get_date_in_the_past(self) -> str | None:
|
|
267
|
+
"""Return the earliest BUSINESS_DATE from the public view as a string.
|
|
268
|
+
|
|
269
|
+
The format includes timezone offset when applicable:
|
|
270
|
+
- ``YYYY-MM-DD HH:MM:SS±HH:MM`` if timezone info is present;
|
|
271
|
+
- otherwise ``YYYY-MM-DD HH:MM:SS``.
|
|
186
272
|
|
|
187
273
|
Returns:
|
|
188
|
-
|
|
189
|
-
('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
|
|
274
|
+
The formatted earliest date/time string, or ``None`` if parsing fails.
|
|
190
275
|
"""
|
|
191
|
-
# Use iloc to preserve timezone awareness from pandas
|
|
192
276
|
date_obj = self.display().BUSINESS_DATE.iloc[0]
|
|
277
|
+
logger.debug("Raw earliest BUSINESS_DATE value read: %r (%s)", date_obj, type(date_obj))
|
|
193
278
|
|
|
194
279
|
if isinstance(date_obj, pd.Timestamp):
|
|
195
280
|
datetime_obj = date_obj.to_pydatetime()
|
|
@@ -200,68 +285,390 @@ class TimeManager:
|
|
|
200
285
|
elif isinstance(date_obj, np.datetime64):
|
|
201
286
|
datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
|
|
202
287
|
else:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
print('temp type', type(date_obj))
|
|
206
|
-
return
|
|
288
|
+
logger.warning("Unrecognized BUSINESS_DATE type: %s; value=%r", type(date_obj), date_obj)
|
|
289
|
+
return None
|
|
207
290
|
|
|
208
|
-
# Format with timezone offset if available
|
|
209
291
|
if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
|
|
210
292
|
output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
|
|
211
293
|
else:
|
|
212
294
|
output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
|
|
213
295
|
|
|
296
|
+
logger.debug("Formatted earliest BUSINESS_DATE: %s", output_string)
|
|
214
297
|
return output_string
|
|
215
298
|
|
|
216
|
-
def get_list_date(self):
|
|
217
|
-
"""
|
|
218
|
-
Retrieve a list of dates from the specified table.
|
|
219
|
-
|
|
220
|
-
This function returns a DataFrame containing the dates from the table specified by the schema_name and table_name attributes of the class.
|
|
299
|
+
def get_list_date(self) -> tdml.DataFrame:
|
|
300
|
+
"""Return the full list of time steps from the hidden table.
|
|
221
301
|
|
|
222
302
|
Returns:
|
|
223
|
-
|
|
303
|
+
A Teradata DataFrame over ``schema.table`` (hidden table) with
|
|
304
|
+
``TIME_ID`` and ``BUSINESS_DATE``.
|
|
224
305
|
"""
|
|
306
|
+
logger.debug("Returning Teradata DataFrame for %s.%s.", self.schema_name, self.table_name)
|
|
225
307
|
return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
226
308
|
|
|
227
|
-
def generate_timeline(self, schema_name, view_name, current_included=True):
|
|
228
|
-
"""
|
|
229
|
-
Generate a timeline view based on business dates from a hidden source view.
|
|
309
|
+
def generate_timeline(self, schema_name: str, view_name: str, current_included: bool = True) -> tdml.DataFrame:
|
|
310
|
+
"""Create a timeline view filtered relative to the current business date.
|
|
230
311
|
|
|
231
|
-
|
|
232
|
-
|
|
312
|
+
The new view (``schema_name.view_name``) selects dates from the hidden
|
|
313
|
+
source (``self.view_name + '_HIDDEN'``) up to the current business date
|
|
314
|
+
exposed by the public view (``self.view_name``).
|
|
233
315
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
316
|
+
Args:
|
|
317
|
+
schema_name: Schema where the new timeline view will be created.
|
|
318
|
+
view_name: Name of the new timeline view.
|
|
319
|
+
current_included: If True, include the current business date;
|
|
320
|
+
otherwise, exclude it.
|
|
239
321
|
|
|
240
322
|
Returns:
|
|
241
|
-
|
|
323
|
+
A Teradata DataFrame bound to the newly replaced timeline view.
|
|
242
324
|
"""
|
|
243
|
-
|
|
325
|
+
logger.info(
|
|
326
|
+
"Generating timeline view %s.%s (current_included=%s).",
|
|
327
|
+
schema_name, view_name, current_included
|
|
328
|
+
)
|
|
244
329
|
query = f"""
|
|
245
330
|
REPLACE VIEW {schema_name}.{view_name} AS
|
|
246
331
|
SEL BUSINESS_DATE
|
|
247
332
|
FROM {self.schema_name}.{self.view_name + '_HIDDEN'} A
|
|
248
333
|
"""
|
|
249
|
-
# Modify the query based on whether the current business date should be included
|
|
250
334
|
if current_included:
|
|
251
335
|
query += f"WHERE BUSINESS_DATE <= (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
|
|
252
336
|
else:
|
|
253
337
|
query += f"WHERE BUSINESS_DATE < (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
|
|
254
338
|
|
|
255
|
-
# Execute the query to create the view
|
|
256
339
|
tdml.execute_sql(query)
|
|
257
|
-
|
|
340
|
+
logger.debug("Replaced timeline view with query:\n%s", query.strip())
|
|
258
341
|
return tdml.DataFrame(tdml.in_schema(schema_name, view_name))
|
|
259
342
|
|
|
260
|
-
def get_current_step(self):
|
|
343
|
+
def get_current_step(self) -> int | None:
|
|
344
|
+
"""Return the TIME_ID corresponding to the current BUSINESS_DATE in the view.
|
|
261
345
|
|
|
262
|
-
|
|
346
|
+
Returns:
|
|
347
|
+
The current ``TIME_ID`` if exactly one match is found; otherwise ``None``.
|
|
348
|
+
"""
|
|
349
|
+
# Note: original code omits schema qualifiers here; kept intentionally.
|
|
350
|
+
logger.debug("Fetching current TIME_ID from %s and %s.", self.table_name, self.view_name)
|
|
351
|
+
res = tdml.execute_sql(
|
|
352
|
+
f"SELECT TIME_ID FROM {self.table_name} "
|
|
353
|
+
f"WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})"
|
|
354
|
+
).fetchall()
|
|
263
355
|
|
|
264
|
-
if len(res)==1:
|
|
356
|
+
if len(res) == 1:
|
|
357
|
+
logger.info("Current TIME_ID resolved to %s.", res[0][0])
|
|
265
358
|
return res[0][0]
|
|
266
359
|
|
|
267
|
-
|
|
360
|
+
logger.warning("Could not resolve a unique current TIME_ID (rows returned: %s).", len(res))
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
def clone_timer(
|
|
364
|
+
self,
|
|
365
|
+
source_timemanager,
|
|
366
|
+
time_id_to_apply: int = 1,
|
|
367
|
+
take_ownership: bool = False,
|
|
368
|
+
clone_mode: str = "soft",
|
|
369
|
+
if_exists: str = "error",
|
|
370
|
+
):
|
|
371
|
+
"""
|
|
372
|
+
Clone time-step definitions from another TimeManager.
|
|
373
|
+
|
|
374
|
+
Supports:
|
|
375
|
+
- soft clone (default): point this manager's view to the source _HIDDEN table
|
|
376
|
+
- hard clone: copy the source _HIDDEN table into this schema and own the copy
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
source_timemanager (TimeManager): Source manager to clone from.
|
|
380
|
+
time_id_to_apply (int, optional): TIME_ID to activate in the public view. Default: 1.
|
|
381
|
+
take_ownership (bool, optional): For soft clones only, whether this
|
|
382
|
+
manager should consider itself the owner of the hidden table.
|
|
383
|
+
(Hard clones always own their copy.) Default: False.
|
|
384
|
+
clone_mode (str, optional): "soft" or "hard". Default: "soft".
|
|
385
|
+
if_exists (str, optional): What to do if the destination hidden table already exists
|
|
386
|
+
- "error" (default): raise an exception
|
|
387
|
+
- "replace": drop and recreate
|
|
388
|
+
- "skip": reuse existing table
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
TimeManager: self
|
|
392
|
+
|
|
393
|
+
Raises:
|
|
394
|
+
ValueError: On invalid clone_mode/if_exists or missing source table.
|
|
395
|
+
RuntimeError: If destination exists and if_exists="error".
|
|
396
|
+
"""
|
|
397
|
+
if clone_mode not in ("soft", "hard"):
|
|
398
|
+
raise ValueError("clone_mode must be 'soft' or 'hard'")
|
|
399
|
+
if if_exists not in ("error", "replace", "skip"):
|
|
400
|
+
raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
|
|
401
|
+
|
|
402
|
+
src_schema = source_timemanager.schema_name
|
|
403
|
+
src_hidden = source_timemanager.table_name
|
|
404
|
+
|
|
405
|
+
logger.info(
|
|
406
|
+
"Cloning timer",
|
|
407
|
+
extra={
|
|
408
|
+
"mode": clone_mode,
|
|
409
|
+
"source": f"{src_schema}.{src_hidden}",
|
|
410
|
+
"target_view": f"{self.schema_name}.{self.view_name}",
|
|
411
|
+
},
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Validate source existence
|
|
415
|
+
existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
|
|
416
|
+
if src_hidden.lower() not in existing_src:
|
|
417
|
+
raise ValueError(f"Source hidden timer table {src_schema}.{src_hidden} does not exist.")
|
|
418
|
+
|
|
419
|
+
if clone_mode == "hard":
|
|
420
|
+
# Hard clone → create (or reuse) a NEW hidden table in this schema
|
|
421
|
+
self.table_name = get_hidden_table_name(self.view_name)
|
|
422
|
+
existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
|
|
423
|
+
|
|
424
|
+
if self.table_name.lower() in existing_dest:
|
|
425
|
+
if if_exists == "error":
|
|
426
|
+
raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
|
|
427
|
+
elif if_exists == "replace":
|
|
428
|
+
logger.warning("Replacing existing table %s.%s", self.schema_name, self.table_name)
|
|
429
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
|
|
430
|
+
elif if_exists == "skip":
|
|
431
|
+
logger.info("Skipping clone, using existing %s.%s", self.schema_name, self.table_name)
|
|
432
|
+
|
|
433
|
+
if self.table_name.lower() not in existing_dest or if_exists == "replace":
|
|
434
|
+
logger.info("Creating cloned table %s.%s", self.schema_name, self.table_name)
|
|
435
|
+
create_sql = f"""
|
|
436
|
+
CREATE TABLE {self.schema_name}.{self.table_name} AS
|
|
437
|
+
(SELECT * FROM {src_schema}.{src_hidden})
|
|
438
|
+
WITH DATA
|
|
439
|
+
"""
|
|
440
|
+
tdml.execute_sql(create_sql)
|
|
441
|
+
|
|
442
|
+
self._owns_hidden = True
|
|
443
|
+
target_schema = self.schema_name
|
|
444
|
+
|
|
445
|
+
else:
|
|
446
|
+
# Soft clone → just point to the source hidden table
|
|
447
|
+
logger.info("Soft clone: linking view to source hidden table")
|
|
448
|
+
self.table_name = src_hidden
|
|
449
|
+
self._owns_hidden = bool(take_ownership)
|
|
450
|
+
target_schema = src_schema # view will select from the source schema
|
|
451
|
+
|
|
452
|
+
# Load metadata from the target hidden table
|
|
453
|
+
df_meta = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
|
|
454
|
+
# Get data type for BUSINESS_DATE (if present)
|
|
455
|
+
try:
|
|
456
|
+
dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
|
|
457
|
+
self.data_type = dtypes.get("BUSINESS_DATE")
|
|
458
|
+
except Exception:
|
|
459
|
+
self.data_type = None
|
|
460
|
+
|
|
461
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
462
|
+
f"SEL MAX(TIME_ID) FROM {target_schema}.{self.table_name}"
|
|
463
|
+
).fetchall()[0][0]
|
|
464
|
+
|
|
465
|
+
# Rebuild the public view to the requested TIME_ID
|
|
466
|
+
view_sql = f"""
|
|
467
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
468
|
+
SELECT BUSINESS_DATE
|
|
469
|
+
FROM {target_schema}.{self.table_name}
|
|
470
|
+
WHERE TIME_ID = {int(time_id_to_apply)}
|
|
471
|
+
"""
|
|
472
|
+
tdml.execute_sql(view_sql)
|
|
473
|
+
|
|
474
|
+
logger.info(
|
|
475
|
+
"Timer clone complete → Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
|
|
476
|
+
time_id_to_apply, self.nb_time_steps, self.data_type
|
|
477
|
+
)
|
|
478
|
+
return self
|
|
479
|
+
|
|
480
|
+
def take_ownership(
|
|
481
|
+
self,
|
|
482
|
+
create_copy: bool = True,
|
|
483
|
+
if_exists: str = "error",
|
|
484
|
+
) -> "TimeManager":
|
|
485
|
+
"""Promote this manager to OWN the hidden table.
|
|
486
|
+
|
|
487
|
+
Two modes:
|
|
488
|
+
- create_copy=True (default): Hard-promote by copying the current source
|
|
489
|
+
hidden table into this manager's schema as <view>_HIDDEN, repoint the
|
|
490
|
+
view, and set ownership.
|
|
491
|
+
- create_copy=False: Only mark as owned if the current hidden table is
|
|
492
|
+
already <schema=self.schema_name, table=<view>_HIDDEN>. Otherwise, warn.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
create_copy: If True, copy data into this schema and repoint view.
|
|
496
|
+
if_exists: Behavior when the destination <view>_HIDDEN already exists:
|
|
497
|
+
- "error" (default): raise
|
|
498
|
+
- "replace": drop & recreate
|
|
499
|
+
- "skip": reuse existing
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
TimeManager: self
|
|
503
|
+
"""
|
|
504
|
+
if if_exists not in ("error", "replace", "skip"):
|
|
505
|
+
raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
|
|
506
|
+
|
|
507
|
+
# Figure out current active TIME_ID to preserve selection after repointing
|
|
508
|
+
try:
|
|
509
|
+
current_time_id = self.get_current_step()
|
|
510
|
+
except Exception:
|
|
511
|
+
current_time_id = None
|
|
512
|
+
if current_time_id is None:
|
|
513
|
+
current_time_id = 1
|
|
514
|
+
|
|
515
|
+
dest_table = get_hidden_table_name(self.view_name)
|
|
516
|
+
|
|
517
|
+
if not create_copy:
|
|
518
|
+
# Only mark as owned if we already match <schema, view_HIDDEN>
|
|
519
|
+
if self.schema_name and self.table_name == dest_table:
|
|
520
|
+
logger.info(
|
|
521
|
+
"Marking existing hidden table %s.%s as owned (no copy).",
|
|
522
|
+
self.schema_name, self.table_name
|
|
523
|
+
)
|
|
524
|
+
self._owns_hidden = True
|
|
525
|
+
return self
|
|
526
|
+
logger.warning(
|
|
527
|
+
"Cannot take ownership without copying: current table is %s (expected %s). "
|
|
528
|
+
"Re-run with create_copy=True to copy into %s.%s.",
|
|
529
|
+
self.table_name, dest_table, self.schema_name, dest_table
|
|
530
|
+
)
|
|
531
|
+
return self
|
|
532
|
+
|
|
533
|
+
# We will copy data into <self.schema_name>.<view>_HIDDEN
|
|
534
|
+
dest_exists = [
|
|
535
|
+
t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
|
|
536
|
+
]
|
|
537
|
+
need_create = True
|
|
538
|
+
|
|
539
|
+
if dest_table.lower() in dest_exists:
|
|
540
|
+
if if_exists == "error":
|
|
541
|
+
raise RuntimeError(f"Destination table {self.schema_name}.{dest_table} already exists.")
|
|
542
|
+
elif if_exists == "replace":
|
|
543
|
+
logger.warning("Replacing existing table %s.%s", self.schema_name, dest_table)
|
|
544
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=dest_table)
|
|
545
|
+
elif if_exists == "skip":
|
|
546
|
+
logger.info("Reusing existing destination table %s.%s", self.schema_name, dest_table)
|
|
547
|
+
need_create = False
|
|
548
|
+
|
|
549
|
+
if need_create:
|
|
550
|
+
logger.info(
|
|
551
|
+
"Creating owned copy %s.%s from current source %s.%s",
|
|
552
|
+
self.schema_name, dest_table, self.schema_name, self.table_name
|
|
553
|
+
)
|
|
554
|
+
# The current table might be in another schema; qualify from the DataFrame binding
|
|
555
|
+
# Derive the true source schema for safety
|
|
556
|
+
# (If you know it's always schema-qualified in self.table_name, keep as-is.)
|
|
557
|
+
src_schema = self.schema_name if self._exists() else None
|
|
558
|
+
# Fallback to probing the DataFrame binding for schema
|
|
559
|
+
if src_schema is None:
|
|
560
|
+
logger.debug("Could not verify source schema via _exists(); defaulting to self.schema_name.")
|
|
561
|
+
src_schema = self.schema_name
|
|
562
|
+
|
|
563
|
+
create_sql = f"""
|
|
564
|
+
CREATE TABLE {self.schema_name}.{dest_table} AS
|
|
565
|
+
(SELECT * FROM {src_schema}.{self.table_name})
|
|
566
|
+
WITH DATA
|
|
567
|
+
"""
|
|
568
|
+
tdml.execute_sql(create_sql)
|
|
569
|
+
|
|
570
|
+
# Repoint this manager to the new owned table and rebuild the view
|
|
571
|
+
self.table_name = dest_table
|
|
572
|
+
self._owns_hidden = True
|
|
573
|
+
|
|
574
|
+
view_sql = f"""
|
|
575
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
576
|
+
SELECT BUSINESS_DATE
|
|
577
|
+
FROM {self.schema_name}.{self.table_name}
|
|
578
|
+
WHERE TIME_ID = {int(current_time_id)}
|
|
579
|
+
"""
|
|
580
|
+
tdml.execute_sql(view_sql)
|
|
581
|
+
|
|
582
|
+
# Refresh metadata
|
|
583
|
+
try:
|
|
584
|
+
df_meta = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
585
|
+
dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
|
|
586
|
+
self.data_type = dtypes.get("BUSINESS_DATE")
|
|
587
|
+
except Exception:
|
|
588
|
+
pass
|
|
589
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
590
|
+
f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
|
|
591
|
+
).fetchall()[0][0]
|
|
592
|
+
|
|
593
|
+
logger.info(
|
|
594
|
+
"Ownership taken for %s.%s. Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
|
|
595
|
+
self.schema_name, self.table_name, current_time_id, self.nb_time_steps, self.data_type
|
|
596
|
+
)
|
|
597
|
+
return self
|
|
598
|
+
|
|
599
|
+
def get_current_timeid(self) -> int:
|
|
600
|
+
"""
|
|
601
|
+
Extract the currently active TIME_ID from the public view's DDL.
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
int: TIME_ID parsed from the view definition.
|
|
605
|
+
|
|
606
|
+
Raises:
|
|
607
|
+
ValueError: If the TIME_ID cannot be parsed from the DDL.
|
|
608
|
+
"""
|
|
609
|
+
logger.debug("Reading view DDL to extract current TIME_ID")
|
|
610
|
+
txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
|
|
611
|
+
|
|
612
|
+
# Look for "WHERE TIME_ID = <number>" (allow whitespace/case variations)
|
|
613
|
+
m = re.search(r"WHERE\s+TIME_ID\s*=\s*(\d+)", txt, flags=re.IGNORECASE)
|
|
614
|
+
if not m:
|
|
615
|
+
logger.exception("Failed to parse TIME_ID from view DDL")
|
|
616
|
+
raise ValueError("Unable to parse current TIME_ID from view DDL.")
|
|
617
|
+
current = int(m.group(1))
|
|
618
|
+
logger.info("Current TIME_ID extracted", extra={"time_id": current})
|
|
619
|
+
return current
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def print_view_ddl(self) -> None:
|
|
623
|
+
"""
|
|
624
|
+
Log the view definition (DDL) for troubleshooting/traceability.
|
|
625
|
+
"""
|
|
626
|
+
ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
|
|
627
|
+
logger.info("View DDL:\n%s", ddl)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def prune_time(self, time_id: int | None = None):
|
|
631
|
+
"""
|
|
632
|
+
Remove all time steps with TIME_ID lower than `time_id` and renumber remaining ones.
|
|
633
|
+
|
|
634
|
+
If `time_id` is omitted, the method uses the current TIME_ID from the view.
|
|
635
|
+
After pruning, TIME_ID values are normalized so the smallest remaining id becomes 1,
|
|
636
|
+
and the public view is repointed to TIME_ID=1.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
time_id (int, optional): Threshold id; rows with TIME_ID < time_id are deleted.
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
TimeManager: Self, to allow method chaining.
|
|
643
|
+
"""
|
|
644
|
+
if time_id is None:
|
|
645
|
+
time_id = self.get_current_timeid()
|
|
646
|
+
|
|
647
|
+
logger.info("Pruning time steps", extra={"threshold_time_id": time_id})
|
|
648
|
+
|
|
649
|
+
delete_sql = f"""
|
|
650
|
+
DELETE {self.schema_name}.{self.table_name}
|
|
651
|
+
WHERE TIME_ID < {int(time_id)}
|
|
652
|
+
"""
|
|
653
|
+
update_sql = f"""
|
|
654
|
+
UPDATE {self.schema_name}.{self.table_name}
|
|
655
|
+
SET TIME_ID = TIME_ID - {int(time_id)} + 1
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
logger.debug("Executing prune delete", extra={"sql": delete_sql})
|
|
659
|
+
tdml.execute_sql(delete_sql)
|
|
660
|
+
|
|
661
|
+
logger.debug("Executing prune renumber", extra={"sql": update_sql})
|
|
662
|
+
tdml.execute_sql(update_sql)
|
|
663
|
+
|
|
664
|
+
# Refresh metadata and repoint view to TIME_ID=1
|
|
665
|
+
self.update(1)
|
|
666
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
667
|
+
f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
|
|
668
|
+
).fetchall()[0][0]
|
|
669
|
+
|
|
670
|
+
logger.info(
|
|
671
|
+
"Prune complete; active TIME_ID set to 1; nb_time_steps=%s",
|
|
672
|
+
self.nb_time_steps
|
|
673
|
+
)
|
|
674
|
+
return self
|