tdfs4ds 0.2.4.26__py3-none-any.whl → 0.2.4.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +586 -564
- tdfs4ds/feature_store/feature_data_processing.py +367 -299
- tdfs4ds/feature_store/feature_query_retrieval.py +105 -52
- tdfs4ds/feature_store/feature_store_management.py +226 -231
- tdfs4ds/process_store/process_followup.py +113 -2
- tdfs4ds/process_store/process_query_administration.py +1 -1
- tdfs4ds/process_store/process_registration_management.py +67 -55
- tdfs4ds/process_store/process_store_catalog_management.py +2 -2
- tdfs4ds/utils/filter_management.py +521 -138
- tdfs4ds/utils/query_management.py +18 -40
- tdfs4ds/utils/time_management.py +547 -97
- {tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.41.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.41.dist-info}/RECORD +15 -15
- {tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.41.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.26.dist-info → tdfs4ds-0.2.4.41.dist-info}/top_level.txt +0 -0
tdfs4ds/utils/time_management.py
CHANGED
|
@@ -1,32 +1,94 @@
|
|
|
1
1
|
import teradataml as tdml
|
|
2
2
|
import datetime
|
|
3
|
+
from tdfs4ds import logger, logger_safe
|
|
4
|
+
import re
|
|
3
5
|
|
|
4
6
|
import tdfs4ds
|
|
5
7
|
import numpy as np
|
|
6
8
|
import pandas as pd
|
|
7
9
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
def get_hidden_table_name(table_name: str) -> str:
|
|
12
|
+
"""Return the hidden table name associated with a public view name.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
table_name: Base table or view name.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The hidden table name (e.g., "<name>_HIDDEN").
|
|
11
19
|
"""
|
|
12
|
-
|
|
20
|
+
return f"{table_name}_HIDDEN"
|
|
13
21
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
22
|
+
|
|
23
|
+
class TimeManager:
|
|
24
|
+
"""
|
|
25
|
+
Manage versioned business time steps behind a Teradata-backed public view.
|
|
26
|
+
|
|
27
|
+
`TimeManager` stores a sequence of time “snapshots” in a hidden physical
|
|
28
|
+
table (`<view_name>_HIDDEN`) with two columns:
|
|
29
|
+
- `TIME_ID` (1..N): the step index, assigned deterministically via
|
|
30
|
+
`ROW_NUMBER()` over the input time column.
|
|
31
|
+
- `BUSINESS_DATE`: the business date/time associated with each step. When
|
|
32
|
+
loading, the SQL type is inferred and upcast to `TIMESTAMP WITH TIME ZONE`
|
|
33
|
+
when needed to preserve offsets.
|
|
34
|
+
|
|
35
|
+
A companion public view (`<view_name>`) always exposes the *current* business
|
|
36
|
+
date by filtering the hidden table on a single `TIME_ID`. Changing the
|
|
37
|
+
current step only rewrites the view definition—no data is mutated.
|
|
38
|
+
|
|
39
|
+
Key capabilities
|
|
40
|
+
- Load/replace the hidden table from a pandas DataFrame (`load_time_steps`).
|
|
41
|
+
- Switch the active time step by `TIME_ID` (`update`).
|
|
42
|
+
- Inspect the current date/time (`display`, `get_date_in_the_past`).
|
|
43
|
+
- Generate a timeline view up to (or strictly before) the current step
|
|
44
|
+
(`generate_timeline`).
|
|
45
|
+
- Prune older steps and renumber so the earliest remaining step becomes 1
|
|
46
|
+
(`prune_time`).
|
|
47
|
+
- Clone from another `TimeManager` (soft link or hard copy) and optionally
|
|
48
|
+
take ownership of the hidden table (`clone_timer`, `take_ownership`).
|
|
49
|
+
- Introspect the active step by parsing the view DDL (`get_current_timeid`,
|
|
50
|
+
`print_view_ddl`).
|
|
51
|
+
|
|
52
|
+
Workflow overview
|
|
53
|
+
1) Instantiate `TimeManager` with a target `view_name` and `schema_name`.
|
|
54
|
+
2) Call `load_time_steps(df, time_column)` to (re)create the hidden table and
|
|
55
|
+
point the public view at `TIME_ID = 1`.
|
|
56
|
+
3) Use `update(time_id)` to switch the active business date.
|
|
57
|
+
4) (Optional) Create derivative timeline views, prune older steps, or clone.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
table_name (str): Base public view name to manage (e.g., "MY_VIEW").
|
|
61
|
+
schema_name (str): Teradata schema/database that holds the artifacts.
|
|
62
|
+
|
|
63
|
+
Attributes
|
|
64
|
+
schema_name (str): Target schema for the view and hidden table.
|
|
65
|
+
table_name (str): Hidden table name (`<view_name>_HIDDEN`).
|
|
66
|
+
view_name (str): Public view name (`<view_name>`).
|
|
67
|
+
time_id (str): Name of the step identifier column (default: "time_id").
|
|
68
|
+
nb_time_steps (int | None): Number of steps detected after load/inspection.
|
|
69
|
+
data_type (str | None): SQL data type of `BUSINESS_DATE` (e.g., `DATE`,
|
|
70
|
+
`TIMESTAMP WITH TIME ZONE`), inferred during load/inspection.
|
|
71
|
+
|
|
72
|
+
Notes
|
|
73
|
+
- On initialization, if the hidden table already exists, metadata
|
|
74
|
+
(`data_type`, `nb_time_steps`) is auto-detected.
|
|
75
|
+
- `load_time_steps` will drop and recreate the hidden table to match the
|
|
76
|
+
inferred schema, then rebuild the public view.
|
|
77
|
+
- “Soft” cloning points this manager’s view at the source hidden table;
|
|
78
|
+
“hard” cloning copies the table into this schema and marks it owned.
|
|
79
|
+
- Ownership controls whether `_drop()` is allowed to remove the hidden
|
|
80
|
+
table (use `take_ownership` to promote ownership when appropriate).
|
|
18
81
|
"""
|
|
19
82
|
|
|
20
|
-
def __init__(self, table_name, schema_name):
|
|
21
|
-
"""
|
|
22
|
-
Initializes the TimeManager with a table name, schema name, and optionally a data type.
|
|
83
|
+
def __init__(self, table_name: str, schema_name: str) -> None:
|
|
84
|
+
"""Initialize a TimeManager for an existing or future hidden table/view.
|
|
23
85
|
|
|
24
|
-
|
|
86
|
+
On initialization, if the hidden table already exists, the instance
|
|
87
|
+
inspects it to populate ``data_type`` and ``nb_time_steps``.
|
|
25
88
|
|
|
26
89
|
Args:
|
|
27
|
-
table_name
|
|
28
|
-
schema_name
|
|
29
|
-
data_type (str, optional): Type of the date/time data. Defaults to 'DATE'.
|
|
90
|
+
table_name: Base public view name to manage (e.g., ``"MY_VIEW"``).
|
|
91
|
+
schema_name: Schema that contains/should contain the objects.
|
|
30
92
|
"""
|
|
31
93
|
self.schema_name = schema_name
|
|
32
94
|
self.table_name = get_hidden_table_name(table_name)
|
|
@@ -35,39 +97,52 @@ class TimeManager:
|
|
|
35
97
|
self.nb_time_steps = None
|
|
36
98
|
self.data_type = None
|
|
37
99
|
|
|
100
|
+
logger.debug(
|
|
101
|
+
"Initializing TimeManager for schema=%s, view=%s, table=%s",
|
|
102
|
+
self.schema_name, self.view_name, self.table_name
|
|
103
|
+
)
|
|
104
|
+
|
|
38
105
|
if self._exists():
|
|
106
|
+
logger.debug("Hidden table %s.%s exists; inspecting metadata.", self.schema_name, self.table_name)
|
|
39
107
|
df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
40
108
|
d_ = {x[0]: x[1] for x in df._td_column_names_and_types}
|
|
41
|
-
self.data_type = d_
|
|
42
|
-
self.nb_time_steps
|
|
43
|
-
f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
|
|
44
|
-
|
|
109
|
+
self.data_type = d_.get('BUSINESS_DATE')
|
|
110
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
111
|
+
f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
|
|
112
|
+
).fetchall()[0][0]
|
|
113
|
+
logger.info(
|
|
114
|
+
"Detected BUSINESS_DATE data_type=%s with nb_time_steps=%s",
|
|
115
|
+
self.data_type, self.nb_time_steps
|
|
116
|
+
)
|
|
45
117
|
|
|
46
|
-
def load_time_steps(self, df, time_column):
|
|
47
|
-
"""
|
|
48
|
-
Load time steps into the table and update the view accordingly.
|
|
118
|
+
def load_time_steps(self, df: pd.DataFrame, time_column: str) -> None:
|
|
119
|
+
"""Load/replace the hidden table and (re)point the public view to step 1.
|
|
49
120
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
121
|
+
Workflow:
|
|
122
|
+
1) Build a DataFrame with sequential ``TIME_ID`` and ``BUSINESS_DATE``.
|
|
123
|
+
2) Infer SQL types and upcast to ``TIMESTAMP WITH TIME ZONE`` when
|
|
124
|
+
needed (to preserve offsets).
|
|
125
|
+
3) Drop and recreate the hidden table with inferred schema.
|
|
126
|
+
4) Append the rows.
|
|
127
|
+
5) Replace the public view to expose ``TIME_ID = 1``.
|
|
128
|
+
6) Store ``nb_time_steps``.
|
|
57
129
|
|
|
58
130
|
Args:
|
|
59
|
-
df
|
|
60
|
-
time_column
|
|
131
|
+
df: Input pandas DataFrame with a time column.
|
|
132
|
+
time_column: Name of the time column in ``df`` to use as ``BUSINESS_DATE``.
|
|
61
133
|
"""
|
|
134
|
+
logger.info("Loading time steps into %s.%s from column '%s'.",
|
|
135
|
+
self.schema_name, self.table_name, time_column)
|
|
62
136
|
|
|
63
|
-
# Step 1: Build DataFrame with
|
|
137
|
+
# Step 1: Build DataFrame with TIME_ID and BUSINESS_DATE
|
|
64
138
|
df_ = df.assign(
|
|
65
139
|
time_id=tdml.sqlalchemy.literal_column(
|
|
66
|
-
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
|
|
140
|
+
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
|
|
67
141
|
tdml.BIGINT()
|
|
68
142
|
),
|
|
69
143
|
BUSINESS_DATE=df[time_column]
|
|
70
144
|
)[["time_id", "BUSINESS_DATE"]]
|
|
145
|
+
logger.debug("Constructed intermediate DataFrame with TIME_ID and BUSINESS_DATE.")
|
|
71
146
|
|
|
72
147
|
# Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
|
|
73
148
|
sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
|
|
@@ -75,9 +150,9 @@ class TimeManager:
|
|
|
75
150
|
|
|
76
151
|
if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
|
|
77
152
|
new_type = f"{type_business_date} WITH TIME ZONE"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
153
|
+
logger.info(
|
|
154
|
+
"Upcasting BUSINESS_DATE from %s to %s to preserve timezone.",
|
|
155
|
+
type_business_date, new_type
|
|
81
156
|
)
|
|
82
157
|
type_business_date = new_type
|
|
83
158
|
sql_types["BUSINESS_DATE"] = new_type
|
|
@@ -89,13 +164,20 @@ class TimeManager:
|
|
|
89
164
|
)
|
|
90
165
|
|
|
91
166
|
self.data_type = type_business_date
|
|
167
|
+
logger.debug("Final BUSINESS_DATE SQL type: %s", self.data_type)
|
|
92
168
|
|
|
93
169
|
# Step 3: Drop table if it exists
|
|
94
170
|
try:
|
|
95
171
|
tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
|
|
172
|
+
logger.debug("Dropped existing table %s.%s (if existed).", self.schema_name, self.table_name)
|
|
96
173
|
except Exception as e:
|
|
174
|
+
# Not fatal; the table might not exist. Log at debug when in dev, warning otherwise.
|
|
175
|
+
e_str = str(e).split('\n')[0]
|
|
176
|
+
msg = f"Error dropping table {self.schema_name}.{self.table_name}: {e_str}"
|
|
97
177
|
if tdfs4ds.DEBUG_MODE:
|
|
98
|
-
|
|
178
|
+
logger.debug(msg)
|
|
179
|
+
else:
|
|
180
|
+
logger.warning(msg)
|
|
99
181
|
|
|
100
182
|
# Step 4: Recreate table
|
|
101
183
|
ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
|
|
@@ -106,6 +188,7 @@ class TimeManager:
|
|
|
106
188
|
PRIMARY INDEX (time_id)
|
|
107
189
|
"""
|
|
108
190
|
tdml.execute_sql(create_table_sql)
|
|
191
|
+
logger.info("Created table %s.%s with schema: %s", self.schema_name, self.table_name, sql_types)
|
|
109
192
|
|
|
110
193
|
# Step 5: Insert data
|
|
111
194
|
df_[list(sql_types.keys())].to_sql(
|
|
@@ -113,6 +196,7 @@ class TimeManager:
|
|
|
113
196
|
schema_name=self.schema_name,
|
|
114
197
|
if_exists="append"
|
|
115
198
|
)
|
|
199
|
+
logger.info("Inserted %s time steps into %s.%s.", df_.shape[0], self.schema_name, self.table_name)
|
|
116
200
|
|
|
117
201
|
# Step 6: Update view
|
|
118
202
|
create_view_sql = f"""
|
|
@@ -122,40 +206,72 @@ class TimeManager:
|
|
|
122
206
|
WHERE time_id = 1
|
|
123
207
|
"""
|
|
124
208
|
tdml.execute_sql(create_view_sql)
|
|
209
|
+
logger.debug("Replaced view %s.%s to point at TIME_ID=1.", self.schema_name, self.view_name)
|
|
125
210
|
|
|
126
211
|
# Step 7: Store number of time steps
|
|
127
212
|
result = tdml.execute_sql(
|
|
128
213
|
f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
129
214
|
).fetchall()
|
|
130
215
|
self.nb_time_steps = result[0][0]
|
|
216
|
+
logger.info("Time steps loaded. nb_time_steps=%s", self.nb_time_steps)
|
|
131
217
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"""
|
|
135
|
-
Checks if the table exists in the database.
|
|
218
|
+
def _exists(self) -> bool:
|
|
219
|
+
"""Check if the hidden table exists in the schema.
|
|
136
220
|
|
|
137
221
|
Returns:
|
|
138
|
-
|
|
222
|
+
True if the hidden table exists; False otherwise.
|
|
139
223
|
"""
|
|
224
|
+
exists = len([
|
|
225
|
+
x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
|
|
226
|
+
if x.lower().replace('"', '') == self.table_name.lower()
|
|
227
|
+
]) > 0
|
|
228
|
+
logger.debug("Hidden table %s.%s exists? %s", self.schema_name, self.table_name, exists)
|
|
229
|
+
return exists
|
|
140
230
|
|
|
141
|
-
|
|
142
|
-
|
|
231
|
+
def _drop(self, drop_view: bool = False, force: bool = False) -> None:
|
|
232
|
+
"""Drop the hidden table if we own it, and optionally the public view.
|
|
143
233
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
234
|
+
Args:
|
|
235
|
+
drop_view: If True, also drop the public view.
|
|
236
|
+
force: If True, drop the hidden table even if we don't own it.
|
|
237
|
+
|
|
238
|
+
Notes:
|
|
239
|
+
- The hidden table is dropped only if:
|
|
240
|
+
* self._owns_hidden is True, or
|
|
241
|
+
* force is True.
|
|
242
|
+
- The view can be dropped regardless of ownership when drop_view=True.
|
|
147
243
|
"""
|
|
148
|
-
# Drop
|
|
244
|
+
# Drop hidden table
|
|
149
245
|
if self._exists():
|
|
150
|
-
|
|
246
|
+
if getattr(self, "_owns_hidden", False) or force:
|
|
247
|
+
logger.info(
|
|
248
|
+
"Dropping hidden table %s.%s (force=%s).",
|
|
249
|
+
self.schema_name, self.table_name, force
|
|
250
|
+
)
|
|
251
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
|
|
252
|
+
else:
|
|
253
|
+
logger.warning(
|
|
254
|
+
"Refusing to drop hidden table %s.%s because this manager does not own it. "
|
|
255
|
+
"Use force=True to override.",
|
|
256
|
+
self.schema_name, self.table_name
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
logger.debug("Hidden table %s.%s does not exist.", self.schema_name, self.table_name)
|
|
151
260
|
|
|
261
|
+
# Optionally drop view
|
|
262
|
+
if drop_view:
|
|
263
|
+
try:
|
|
264
|
+
logger.info("Dropping view %s.%s.", self.schema_name, self.view_name)
|
|
265
|
+
tdml.execute_sql(f"DROP VIEW {self.schema_name}.{self.view_name}")
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.warning("Error dropping view %s.%s: %s", self.schema_name, self.view_name, e)
|
|
152
268
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
269
|
+
|
|
270
|
+
def update(self, time_id: int) -> None:
|
|
271
|
+
"""Point the public view at a specific ``TIME_ID``.
|
|
156
272
|
|
|
157
273
|
Args:
|
|
158
|
-
|
|
274
|
+
time_id: The time step identifier to expose via the public view.
|
|
159
275
|
"""
|
|
160
276
|
if self._exists():
|
|
161
277
|
query = f"""
|
|
@@ -164,32 +280,44 @@ class TimeManager:
|
|
|
164
280
|
FROM {self.schema_name}.{self.table_name}
|
|
165
281
|
WHERE TIME_ID = {time_id}
|
|
166
282
|
"""
|
|
283
|
+
if getattr(tdfs4ds, "DEBUG_MODE", False):
|
|
284
|
+
logger_safe("debug", "Executing view update:\n%s", query)
|
|
167
285
|
|
|
168
|
-
if tdfs4ds.DEBUG_MODE:
|
|
169
|
-
print(query)
|
|
170
286
|
tdml.execute_sql(query)
|
|
287
|
+
logger_safe("info", "Updated view %s.%s to TIME_ID=%s.", self.schema_name, self.view_name, time_id)
|
|
171
288
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
289
|
+
else:
|
|
290
|
+
logger_safe(
|
|
291
|
+
"warning",
|
|
292
|
+
"Cannot update view: hidden table %s.%s does not exist.",
|
|
293
|
+
self.schema_name, self.table_name
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
def display(self) -> pd.DataFrame:
|
|
297
|
+
"""Return the current public view (one row: current BUSINESS_DATE).
|
|
175
298
|
|
|
176
299
|
Returns:
|
|
177
|
-
DataFrame
|
|
300
|
+
A pandas DataFrame with the current ``BUSINESS_DATE`` exposed by the view.
|
|
178
301
|
"""
|
|
179
|
-
|
|
302
|
+
logger.debug("Reading current BUSINESS_DATE from %s.%s.", self.schema_name, self.view_name)
|
|
180
303
|
cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
|
|
181
|
-
return pd.DataFrame(
|
|
304
|
+
return pd.DataFrame(
|
|
305
|
+
tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(),
|
|
306
|
+
columns=cols
|
|
307
|
+
)
|
|
182
308
|
|
|
183
|
-
def get_date_in_the_past(self):
|
|
184
|
-
"""
|
|
185
|
-
|
|
309
|
+
def get_date_in_the_past(self) -> str | None:
|
|
310
|
+
"""Return the earliest BUSINESS_DATE from the public view as a string.
|
|
311
|
+
|
|
312
|
+
The format includes timezone offset when applicable:
|
|
313
|
+
- ``YYYY-MM-DD HH:MM:SS±HH:MM`` if timezone info is present;
|
|
314
|
+
- otherwise ``YYYY-MM-DD HH:MM:SS``.
|
|
186
315
|
|
|
187
316
|
Returns:
|
|
188
|
-
|
|
189
|
-
('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
|
|
317
|
+
The formatted earliest date/time string, or ``None`` if parsing fails.
|
|
190
318
|
"""
|
|
191
|
-
# Use iloc to preserve timezone awareness from pandas
|
|
192
319
|
date_obj = self.display().BUSINESS_DATE.iloc[0]
|
|
320
|
+
logger.debug("Raw earliest BUSINESS_DATE value read: %r (%s)", date_obj, type(date_obj))
|
|
193
321
|
|
|
194
322
|
if isinstance(date_obj, pd.Timestamp):
|
|
195
323
|
datetime_obj = date_obj.to_pydatetime()
|
|
@@ -200,68 +328,390 @@ class TimeManager:
|
|
|
200
328
|
elif isinstance(date_obj, np.datetime64):
|
|
201
329
|
datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
|
|
202
330
|
else:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
print('temp type', type(date_obj))
|
|
206
|
-
return
|
|
331
|
+
logger.warning("Unrecognized BUSINESS_DATE type: %s; value=%r", type(date_obj), date_obj)
|
|
332
|
+
return None
|
|
207
333
|
|
|
208
|
-
# Format with timezone offset if available
|
|
209
334
|
if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
|
|
210
335
|
output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
|
|
211
336
|
else:
|
|
212
337
|
output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
|
|
213
338
|
|
|
339
|
+
logger.debug("Formatted earliest BUSINESS_DATE: %s", output_string)
|
|
214
340
|
return output_string
|
|
215
341
|
|
|
216
|
-
def get_list_date(self):
|
|
217
|
-
"""
|
|
218
|
-
Retrieve a list of dates from the specified table.
|
|
219
|
-
|
|
220
|
-
This function returns a DataFrame containing the dates from the table specified by the schema_name and table_name attributes of the class.
|
|
342
|
+
def get_list_date(self) -> tdml.DataFrame:
|
|
343
|
+
"""Return the full list of time steps from the hidden table.
|
|
221
344
|
|
|
222
345
|
Returns:
|
|
223
|
-
|
|
346
|
+
A Teradata DataFrame over ``schema.table`` (hidden table) with
|
|
347
|
+
``TIME_ID`` and ``BUSINESS_DATE``.
|
|
224
348
|
"""
|
|
349
|
+
logger.debug("Returning Teradata DataFrame for %s.%s.", self.schema_name, self.table_name)
|
|
225
350
|
return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
226
351
|
|
|
227
|
-
def generate_timeline(self, schema_name, view_name, current_included=True):
|
|
228
|
-
"""
|
|
229
|
-
Generate a timeline view based on business dates from a hidden source view.
|
|
352
|
+
def generate_timeline(self, schema_name: str, view_name: str, current_included: bool = True) -> tdml.DataFrame:
|
|
353
|
+
"""Create a timeline view filtered relative to the current business date.
|
|
230
354
|
|
|
231
|
-
|
|
232
|
-
|
|
355
|
+
The new view (``schema_name.view_name``) selects dates from the hidden
|
|
356
|
+
source (``self.view_name + '_HIDDEN'``) up to the current business date
|
|
357
|
+
exposed by the public view (``self.view_name``).
|
|
233
358
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
359
|
+
Args:
|
|
360
|
+
schema_name: Schema where the new timeline view will be created.
|
|
361
|
+
view_name: Name of the new timeline view.
|
|
362
|
+
current_included: If True, include the current business date;
|
|
363
|
+
otherwise, exclude it.
|
|
239
364
|
|
|
240
365
|
Returns:
|
|
241
|
-
|
|
366
|
+
A Teradata DataFrame bound to the newly replaced timeline view.
|
|
242
367
|
"""
|
|
243
|
-
|
|
368
|
+
logger.info(
|
|
369
|
+
"Generating timeline view %s.%s (current_included=%s).",
|
|
370
|
+
schema_name, view_name, current_included
|
|
371
|
+
)
|
|
244
372
|
query = f"""
|
|
245
373
|
REPLACE VIEW {schema_name}.{view_name} AS
|
|
246
374
|
SEL BUSINESS_DATE
|
|
247
375
|
FROM {self.schema_name}.{self.view_name + '_HIDDEN'} A
|
|
248
376
|
"""
|
|
249
|
-
# Modify the query based on whether the current business date should be included
|
|
250
377
|
if current_included:
|
|
251
378
|
query += f"WHERE BUSINESS_DATE <= (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
|
|
252
379
|
else:
|
|
253
380
|
query += f"WHERE BUSINESS_DATE < (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
|
|
254
381
|
|
|
255
|
-
# Execute the query to create the view
|
|
256
382
|
tdml.execute_sql(query)
|
|
257
|
-
|
|
383
|
+
logger.debug("Replaced timeline view with query:\n%s", query.strip())
|
|
258
384
|
return tdml.DataFrame(tdml.in_schema(schema_name, view_name))
|
|
259
385
|
|
|
260
|
-
def get_current_step(self):
|
|
386
|
+
def get_current_step(self) -> int | None:
|
|
387
|
+
"""Return the TIME_ID corresponding to the current BUSINESS_DATE in the view.
|
|
261
388
|
|
|
262
|
-
|
|
389
|
+
Returns:
|
|
390
|
+
The current ``TIME_ID`` if exactly one match is found; otherwise ``None``.
|
|
391
|
+
"""
|
|
392
|
+
# Note: original code omits schema qualifiers here; kept intentionally.
|
|
393
|
+
logger.debug("Fetching current TIME_ID from %s and %s.", self.table_name, self.view_name)
|
|
394
|
+
res = tdml.execute_sql(
|
|
395
|
+
f"SELECT TIME_ID FROM {self.table_name} "
|
|
396
|
+
f"WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})"
|
|
397
|
+
).fetchall()
|
|
263
398
|
|
|
264
|
-
if len(res)==1:
|
|
399
|
+
if len(res) == 1:
|
|
400
|
+
logger.info("Current TIME_ID resolved to %s.", res[0][0])
|
|
265
401
|
return res[0][0]
|
|
266
402
|
|
|
267
|
-
|
|
403
|
+
logger.warning("Could not resolve a unique current TIME_ID (rows returned: %s).", len(res))
|
|
404
|
+
return None
|
|
405
|
+
|
|
406
|
+
def clone_timer(
|
|
407
|
+
self,
|
|
408
|
+
source_timemanager,
|
|
409
|
+
time_id_to_apply: int = 1,
|
|
410
|
+
take_ownership: bool = False,
|
|
411
|
+
clone_mode: str = "soft",
|
|
412
|
+
if_exists: str = "error",
|
|
413
|
+
):
|
|
414
|
+
"""
|
|
415
|
+
Clone time-step definitions from another TimeManager.
|
|
416
|
+
|
|
417
|
+
Supports:
|
|
418
|
+
- soft clone (default): point this manager's view to the source _HIDDEN table
|
|
419
|
+
- hard clone: copy the source _HIDDEN table into this schema and own the copy
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
source_timemanager (TimeManager): Source manager to clone from.
|
|
423
|
+
time_id_to_apply (int, optional): TIME_ID to activate in the public view. Default: 1.
|
|
424
|
+
take_ownership (bool, optional): For soft clones only, whether this
|
|
425
|
+
manager should consider itself the owner of the hidden table.
|
|
426
|
+
(Hard clones always own their copy.) Default: False.
|
|
427
|
+
clone_mode (str, optional): "soft" or "hard". Default: "soft".
|
|
428
|
+
if_exists (str, optional): What to do if the destination hidden table already exists
|
|
429
|
+
- "error" (default): raise an exception
|
|
430
|
+
- "replace": drop and recreate
|
|
431
|
+
- "skip": reuse existing table
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
TimeManager: self
|
|
435
|
+
|
|
436
|
+
Raises:
|
|
437
|
+
ValueError: On invalid clone_mode/if_exists or missing source table.
|
|
438
|
+
RuntimeError: If destination exists and if_exists="error".
|
|
439
|
+
"""
|
|
440
|
+
if clone_mode not in ("soft", "hard"):
|
|
441
|
+
raise ValueError("clone_mode must be 'soft' or 'hard'")
|
|
442
|
+
if if_exists not in ("error", "replace", "skip"):
|
|
443
|
+
raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
|
|
444
|
+
|
|
445
|
+
src_schema = source_timemanager.schema_name
|
|
446
|
+
src_hidden = source_timemanager.table_name
|
|
447
|
+
|
|
448
|
+
logger.info(
|
|
449
|
+
"Cloning timer",
|
|
450
|
+
extra={
|
|
451
|
+
"mode": clone_mode,
|
|
452
|
+
"source": f"{src_schema}.{src_hidden}",
|
|
453
|
+
"target_view": f"{self.schema_name}.{self.view_name}",
|
|
454
|
+
},
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Validate source existence
|
|
458
|
+
existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
|
|
459
|
+
if src_hidden.lower() not in existing_src:
|
|
460
|
+
raise ValueError(f"Source hidden timer table {src_schema}.{src_hidden} does not exist.")
|
|
461
|
+
|
|
462
|
+
if clone_mode == "hard":
|
|
463
|
+
# Hard clone → create (or reuse) a NEW hidden table in this schema
|
|
464
|
+
self.table_name = get_hidden_table_name(self.view_name)
|
|
465
|
+
existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
|
|
466
|
+
|
|
467
|
+
if self.table_name.lower() in existing_dest:
|
|
468
|
+
if if_exists == "error":
|
|
469
|
+
raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
|
|
470
|
+
elif if_exists == "replace":
|
|
471
|
+
logger.warning("Replacing existing table %s.%s", self.schema_name, self.table_name)
|
|
472
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
|
|
473
|
+
elif if_exists == "skip":
|
|
474
|
+
logger.info("Skipping clone, using existing %s.%s", self.schema_name, self.table_name)
|
|
475
|
+
|
|
476
|
+
if self.table_name.lower() not in existing_dest or if_exists == "replace":
|
|
477
|
+
logger.info("Creating cloned table %s.%s", self.schema_name, self.table_name)
|
|
478
|
+
create_sql = f"""
|
|
479
|
+
CREATE TABLE {self.schema_name}.{self.table_name} AS
|
|
480
|
+
(SELECT * FROM {src_schema}.{src_hidden})
|
|
481
|
+
WITH DATA
|
|
482
|
+
"""
|
|
483
|
+
tdml.execute_sql(create_sql)
|
|
484
|
+
|
|
485
|
+
self._owns_hidden = True
|
|
486
|
+
target_schema = self.schema_name
|
|
487
|
+
|
|
488
|
+
else:
|
|
489
|
+
# Soft clone → just point to the source hidden table
|
|
490
|
+
logger.info("Soft clone: linking view to source hidden table")
|
|
491
|
+
self.table_name = src_hidden
|
|
492
|
+
self._owns_hidden = bool(take_ownership)
|
|
493
|
+
target_schema = src_schema # view will select from the source schema
|
|
494
|
+
|
|
495
|
+
# Load metadata from the target hidden table
|
|
496
|
+
df_meta = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
|
|
497
|
+
# Get data type for BUSINESS_DATE (if present)
|
|
498
|
+
try:
|
|
499
|
+
dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
|
|
500
|
+
self.data_type = dtypes.get("BUSINESS_DATE")
|
|
501
|
+
except Exception:
|
|
502
|
+
self.data_type = None
|
|
503
|
+
|
|
504
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
505
|
+
f"SEL MAX(TIME_ID) FROM {target_schema}.{self.table_name}"
|
|
506
|
+
).fetchall()[0][0]
|
|
507
|
+
|
|
508
|
+
# Rebuild the public view to the requested TIME_ID
|
|
509
|
+
view_sql = f"""
|
|
510
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
511
|
+
SELECT BUSINESS_DATE
|
|
512
|
+
FROM {target_schema}.{self.table_name}
|
|
513
|
+
WHERE TIME_ID = {int(time_id_to_apply)}
|
|
514
|
+
"""
|
|
515
|
+
tdml.execute_sql(view_sql)
|
|
516
|
+
|
|
517
|
+
logger.info(
|
|
518
|
+
"Timer clone complete → Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
|
|
519
|
+
time_id_to_apply, self.nb_time_steps, self.data_type
|
|
520
|
+
)
|
|
521
|
+
return self
|
|
522
|
+
|
|
523
|
+
def take_ownership(
|
|
524
|
+
self,
|
|
525
|
+
create_copy: bool = True,
|
|
526
|
+
if_exists: str = "error",
|
|
527
|
+
) -> "TimeManager":
|
|
528
|
+
"""Promote this manager to OWN the hidden table.
|
|
529
|
+
|
|
530
|
+
Two modes:
|
|
531
|
+
- create_copy=True (default): Hard-promote by copying the current source
|
|
532
|
+
hidden table into this manager's schema as <view>_HIDDEN, repoint the
|
|
533
|
+
view, and set ownership.
|
|
534
|
+
- create_copy=False: Only mark as owned if the current hidden table is
|
|
535
|
+
already <schema=self.schema_name, table=<view>_HIDDEN>. Otherwise, warn.
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
create_copy: If True, copy data into this schema and repoint view.
|
|
539
|
+
if_exists: Behavior when the destination <view>_HIDDEN already exists:
|
|
540
|
+
- "error" (default): raise
|
|
541
|
+
- "replace": drop & recreate
|
|
542
|
+
- "skip": reuse existing
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
TimeManager: self
|
|
546
|
+
"""
|
|
547
|
+
if if_exists not in ("error", "replace", "skip"):
|
|
548
|
+
raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
|
|
549
|
+
|
|
550
|
+
# Figure out current active TIME_ID to preserve selection after repointing
|
|
551
|
+
try:
|
|
552
|
+
current_time_id = self.get_current_step()
|
|
553
|
+
except Exception:
|
|
554
|
+
current_time_id = None
|
|
555
|
+
if current_time_id is None:
|
|
556
|
+
current_time_id = 1
|
|
557
|
+
|
|
558
|
+
dest_table = get_hidden_table_name(self.view_name)
|
|
559
|
+
|
|
560
|
+
if not create_copy:
|
|
561
|
+
# Only mark as owned if we already match <schema, view_HIDDEN>
|
|
562
|
+
if self.schema_name and self.table_name == dest_table:
|
|
563
|
+
logger.info(
|
|
564
|
+
"Marking existing hidden table %s.%s as owned (no copy).",
|
|
565
|
+
self.schema_name, self.table_name
|
|
566
|
+
)
|
|
567
|
+
self._owns_hidden = True
|
|
568
|
+
return self
|
|
569
|
+
logger.warning(
|
|
570
|
+
"Cannot take ownership without copying: current table is %s (expected %s). "
|
|
571
|
+
"Re-run with create_copy=True to copy into %s.%s.",
|
|
572
|
+
self.table_name, dest_table, self.schema_name, dest_table
|
|
573
|
+
)
|
|
574
|
+
return self
|
|
575
|
+
|
|
576
|
+
# We will copy data into <self.schema_name>.<view>_HIDDEN
|
|
577
|
+
dest_exists = [
|
|
578
|
+
t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
|
|
579
|
+
]
|
|
580
|
+
need_create = True
|
|
581
|
+
|
|
582
|
+
if dest_table.lower() in dest_exists:
|
|
583
|
+
if if_exists == "error":
|
|
584
|
+
raise RuntimeError(f"Destination table {self.schema_name}.{dest_table} already exists.")
|
|
585
|
+
elif if_exists == "replace":
|
|
586
|
+
logger.warning("Replacing existing table %s.%s", self.schema_name, dest_table)
|
|
587
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=dest_table)
|
|
588
|
+
elif if_exists == "skip":
|
|
589
|
+
logger.info("Reusing existing destination table %s.%s", self.schema_name, dest_table)
|
|
590
|
+
need_create = False
|
|
591
|
+
|
|
592
|
+
if need_create:
|
|
593
|
+
logger.info(
|
|
594
|
+
"Creating owned copy %s.%s from current source %s.%s",
|
|
595
|
+
self.schema_name, dest_table, self.schema_name, self.table_name
|
|
596
|
+
)
|
|
597
|
+
# The current table might be in another schema; qualify from the DataFrame binding
|
|
598
|
+
# Derive the true source schema for safety
|
|
599
|
+
# (If you know it's always schema-qualified in self.table_name, keep as-is.)
|
|
600
|
+
src_schema = self.schema_name if self._exists() else None
|
|
601
|
+
# Fallback to probing the DataFrame binding for schema
|
|
602
|
+
if src_schema is None:
|
|
603
|
+
logger.debug("Could not verify source schema via _exists(); defaulting to self.schema_name.")
|
|
604
|
+
src_schema = self.schema_name
|
|
605
|
+
|
|
606
|
+
create_sql = f"""
|
|
607
|
+
CREATE TABLE {self.schema_name}.{dest_table} AS
|
|
608
|
+
(SELECT * FROM {src_schema}.{self.table_name})
|
|
609
|
+
WITH DATA
|
|
610
|
+
"""
|
|
611
|
+
tdml.execute_sql(create_sql)
|
|
612
|
+
|
|
613
|
+
# Repoint this manager to the new owned table and rebuild the view
|
|
614
|
+
self.table_name = dest_table
|
|
615
|
+
self._owns_hidden = True
|
|
616
|
+
|
|
617
|
+
view_sql = f"""
|
|
618
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
619
|
+
SELECT BUSINESS_DATE
|
|
620
|
+
FROM {self.schema_name}.{self.table_name}
|
|
621
|
+
WHERE TIME_ID = {int(current_time_id)}
|
|
622
|
+
"""
|
|
623
|
+
tdml.execute_sql(view_sql)
|
|
624
|
+
|
|
625
|
+
# Refresh metadata
|
|
626
|
+
try:
|
|
627
|
+
df_meta = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
628
|
+
dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
|
|
629
|
+
self.data_type = dtypes.get("BUSINESS_DATE")
|
|
630
|
+
except Exception:
|
|
631
|
+
pass
|
|
632
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
633
|
+
f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
|
|
634
|
+
).fetchall()[0][0]
|
|
635
|
+
|
|
636
|
+
logger.info(
|
|
637
|
+
"Ownership taken for %s.%s. Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
|
|
638
|
+
self.schema_name, self.table_name, current_time_id, self.nb_time_steps, self.data_type
|
|
639
|
+
)
|
|
640
|
+
return self
|
|
641
|
+
|
|
642
|
+
def get_current_timeid(self) -> int:
|
|
643
|
+
"""
|
|
644
|
+
Extract the currently active TIME_ID from the public view's DDL.
|
|
645
|
+
|
|
646
|
+
Returns:
|
|
647
|
+
int: TIME_ID parsed from the view definition.
|
|
648
|
+
|
|
649
|
+
Raises:
|
|
650
|
+
ValueError: If the TIME_ID cannot be parsed from the DDL.
|
|
651
|
+
"""
|
|
652
|
+
logger.debug("Reading view DDL to extract current TIME_ID")
|
|
653
|
+
txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
|
|
654
|
+
|
|
655
|
+
# Look for "WHERE TIME_ID = <number>" (allow whitespace/case variations)
|
|
656
|
+
m = re.search(r"WHERE\s+TIME_ID\s*=\s*(\d+)", txt, flags=re.IGNORECASE)
|
|
657
|
+
if not m:
|
|
658
|
+
logger.exception("Failed to parse TIME_ID from view DDL")
|
|
659
|
+
raise ValueError("Unable to parse current TIME_ID from view DDL.")
|
|
660
|
+
current = int(m.group(1))
|
|
661
|
+
logger.info("Current TIME_ID extracted", extra={"time_id": current})
|
|
662
|
+
return current
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def print_view_ddl(self) -> None:
|
|
666
|
+
"""
|
|
667
|
+
Log the view definition (DDL) for troubleshooting/traceability.
|
|
668
|
+
"""
|
|
669
|
+
ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
|
|
670
|
+
logger.info("View DDL:\n%s", ddl)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def prune_time(self, time_id: int | None = None):
|
|
674
|
+
"""
|
|
675
|
+
Remove all time steps with TIME_ID lower than `time_id` and renumber remaining ones.
|
|
676
|
+
|
|
677
|
+
If `time_id` is omitted, the method uses the current TIME_ID from the view.
|
|
678
|
+
After pruning, TIME_ID values are normalized so the smallest remaining id becomes 1,
|
|
679
|
+
and the public view is repointed to TIME_ID=1.
|
|
680
|
+
|
|
681
|
+
Args:
|
|
682
|
+
time_id (int, optional): Threshold id; rows with TIME_ID < time_id are deleted.
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
TimeManager: Self, to allow method chaining.
|
|
686
|
+
"""
|
|
687
|
+
if time_id is None:
|
|
688
|
+
time_id = self.get_current_timeid()
|
|
689
|
+
|
|
690
|
+
logger.info("Pruning time steps", extra={"threshold_time_id": time_id})
|
|
691
|
+
|
|
692
|
+
delete_sql = f"""
|
|
693
|
+
DELETE {self.schema_name}.{self.table_name}
|
|
694
|
+
WHERE TIME_ID < {int(time_id)}
|
|
695
|
+
"""
|
|
696
|
+
update_sql = f"""
|
|
697
|
+
UPDATE {self.schema_name}.{self.table_name}
|
|
698
|
+
SET TIME_ID = TIME_ID - {int(time_id)} + 1
|
|
699
|
+
"""
|
|
700
|
+
|
|
701
|
+
logger.debug("Executing prune delete", extra={"sql": delete_sql})
|
|
702
|
+
tdml.execute_sql(delete_sql)
|
|
703
|
+
|
|
704
|
+
logger.debug("Executing prune renumber", extra={"sql": update_sql})
|
|
705
|
+
tdml.execute_sql(update_sql)
|
|
706
|
+
|
|
707
|
+
# Refresh metadata and repoint view to TIME_ID=1
|
|
708
|
+
self.update(1)
|
|
709
|
+
self.nb_time_steps = tdml.execute_sql(
|
|
710
|
+
f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
|
|
711
|
+
).fetchall()[0][0]
|
|
712
|
+
|
|
713
|
+
logger.info(
|
|
714
|
+
"Prune complete; active TIME_ID set to 1; nb_time_steps=%s",
|
|
715
|
+
self.nb_time_steps
|
|
716
|
+
)
|
|
717
|
+
return self
|