tdfs4ds 0.2.4.26__py3-none-any.whl → 0.2.4.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,94 @@
1
1
  import teradataml as tdml
2
2
  import datetime
3
+ from tdfs4ds import logger, logger_safe
4
+ import re
3
5
 
4
6
  import tdfs4ds
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
 
8
- def get_hidden_table_name(table_name):
9
- return table_name + '_HIDDEN'
10
- class TimeManager:
10
+
11
+ def get_hidden_table_name(table_name: str) -> str:
12
+ """Return the hidden table name associated with a public view name.
13
+
14
+ Args:
15
+ table_name: Base table or view name.
16
+
17
+ Returns:
18
+ The hidden table name (e.g., "<name>_HIDDEN").
11
19
  """
12
- A class to manage time-related operations in a database table.
20
+ return f"{table_name}_HIDDEN"
13
21
 
14
- Attributes:
15
- schema_name (str): Name of the schema in the database.
16
- table_name (str): Name of the table in the schema.
17
- data_type (str): Type of the date/time data, defaults to 'DATE'.
22
+
23
+ class TimeManager:
24
+ """
25
+ Manage versioned business time steps behind a Teradata-backed public view.
26
+
27
+ `TimeManager` stores a sequence of time “snapshots” in a hidden physical
28
+ table (`<view_name>_HIDDEN`) with two columns:
29
+ - `TIME_ID` (1..N): the step index, assigned deterministically via
30
+ `ROW_NUMBER()` over the input time column.
31
+ - `BUSINESS_DATE`: the business date/time associated with each step. When
32
+ loading, the SQL type is inferred and upcast to `TIMESTAMP WITH TIME ZONE`
33
+ when needed to preserve offsets.
34
+
35
+ A companion public view (`<view_name>`) always exposes the *current* business
36
+ date by filtering the hidden table on a single `TIME_ID`. Changing the
37
+ current step only rewrites the view definition—no data is mutated.
38
+
39
+ Key capabilities
40
+ - Load/replace the hidden table from a pandas DataFrame (`load_time_steps`).
41
+ - Switch the active time step by `TIME_ID` (`update`).
42
+ - Inspect the current date/time (`display`, `get_date_in_the_past`).
43
+ - Generate a timeline view up to (or strictly before) the current step
44
+ (`generate_timeline`).
45
+ - Prune older steps and renumber so the earliest remaining step becomes 1
46
+ (`prune_time`).
47
+ - Clone from another `TimeManager` (soft link or hard copy) and optionally
48
+ take ownership of the hidden table (`clone_timer`, `take_ownership`).
49
+ - Introspect the active step by parsing the view DDL (`get_current_timeid`,
50
+ `print_view_ddl`).
51
+
52
+ Workflow overview
53
+ 1) Instantiate `TimeManager` with a target `view_name` and `schema_name`.
54
+ 2) Call `load_time_steps(df, time_column)` to (re)create the hidden table and
55
+ point the public view at `TIME_ID = 1`.
56
+ 3) Use `update(time_id)` to switch the active business date.
57
+ 4) (Optional) Create derivative timeline views, prune older steps, or clone.
58
+
59
+ Parameters
60
+ table_name (str): Base public view name to manage (e.g., "MY_VIEW").
61
+ schema_name (str): Teradata schema/database that holds the artifacts.
62
+
63
+ Attributes
64
+ schema_name (str): Target schema for the view and hidden table.
65
+ table_name (str): Hidden table name (`<view_name>_HIDDEN`).
66
+ view_name (str): Public view name (`<view_name>`).
67
+ time_id (str): Name of the step identifier column (default: "time_id").
68
+ nb_time_steps (int | None): Number of steps detected after load/inspection.
69
+ data_type (str | None): SQL data type of `BUSINESS_DATE` (e.g., `DATE`,
70
+ `TIMESTAMP WITH TIME ZONE`), inferred during load/inspection.
71
+
72
+ Notes
73
+ - On initialization, if the hidden table already exists, metadata
74
+ (`data_type`, `nb_time_steps`) is auto-detected.
75
+ - `load_time_steps` will drop and recreate the hidden table to match the
76
+ inferred schema, then rebuild the public view.
77
+ - “Soft” cloning points this manager’s view at the source hidden table;
78
+ “hard” cloning copies the table into this schema and marks it owned.
79
+ - Ownership controls whether `_drop()` is allowed to remove the hidden
80
+ table (use `take_ownership` to promote ownership when appropriate).
18
81
  """
19
82
 
20
- def __init__(self, table_name, schema_name):
21
- """
22
- Initializes the TimeManager with a table name, schema name, and optionally a data type.
83
+ def __init__(self, table_name: str, schema_name: str) -> None:
84
+ """Initialize a TimeManager for an existing or future hidden table/view.
23
85
 
24
- If the table doesn't exist, it creates one with a BUSINESS_DATE column of the specified data type.
86
+ On initialization, if the hidden table already exists, the instance
87
+ inspects it to populate ``data_type`` and ``nb_time_steps``.
25
88
 
26
89
  Args:
27
- table_name (str): Name of the table.
28
- schema_name (str): Name of the schema.
29
- data_type (str, optional): Type of the date/time data. Defaults to 'DATE'.
90
+ table_name: Base public view name to manage (e.g., ``"MY_VIEW"``).
91
+ schema_name: Schema that contains/should contain the objects.
30
92
  """
31
93
  self.schema_name = schema_name
32
94
  self.table_name = get_hidden_table_name(table_name)
@@ -35,39 +97,52 @@ class TimeManager:
35
97
  self.nb_time_steps = None
36
98
  self.data_type = None
37
99
 
100
+ logger.debug(
101
+ "Initializing TimeManager for schema=%s, view=%s, table=%s",
102
+ self.schema_name, self.view_name, self.table_name
103
+ )
104
+
38
105
  if self._exists():
106
+ logger.debug("Hidden table %s.%s exists; inspecting metadata.", self.schema_name, self.table_name)
39
107
  df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
40
108
  d_ = {x[0]: x[1] for x in df._td_column_names_and_types}
41
- self.data_type = d_['BUSINESS_DATE']
42
- self.nb_time_steps = tdml.execute_sql(
43
- f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}").fetchall()[
44
- 0][0]
109
+ self.data_type = d_.get('BUSINESS_DATE')
110
+ self.nb_time_steps = tdml.execute_sql(
111
+ f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
112
+ ).fetchall()[0][0]
113
+ logger.info(
114
+ "Detected BUSINESS_DATE data_type=%s with nb_time_steps=%s",
115
+ self.data_type, self.nb_time_steps
116
+ )
45
117
 
46
- def load_time_steps(self, df, time_column):
47
- """
48
- Load time steps into the table and update the view accordingly.
118
+ def load_time_steps(self, df: pd.DataFrame, time_column: str) -> None:
119
+ """Load/replace the hidden table and (re)point the public view to step 1.
49
120
 
50
- This method:
51
- 1. Creates a new DataFrame with a sequential time_id and BUSINESS_DATE.
52
- 2. Ensures BUSINESS_DATE has the correct SQL data type.
53
- 3. Drops and recreates the target table with the appropriate schema.
54
- 4. Inserts the new data into the table.
55
- 5. Updates the view to reference the first time step.
56
- 6. Stores the number of time steps in `self.nb_time_steps`.
121
+ Workflow:
122
+ 1) Build a DataFrame with sequential ``TIME_ID`` and ``BUSINESS_DATE``.
123
+ 2) Infer SQL types and upcast to ``TIMESTAMP WITH TIME ZONE`` when
124
+ needed (to preserve offsets).
125
+ 3) Drop and recreate the hidden table with inferred schema.
126
+ 4) Append the rows.
127
+ 5) Replace the public view to expose ``TIME_ID = 1``.
128
+ 6) Store ``nb_time_steps``.
57
129
 
58
130
  Args:
59
- df (pd.DataFrame): The input DataFrame containing time data.
60
- time_column (str): The column name representing time.
131
+ df: Input pandas DataFrame with a time column.
132
+ time_column: Name of the time column in ``df`` to use as ``BUSINESS_DATE``.
61
133
  """
134
+ logger.info("Loading time steps into %s.%s from column '%s'.",
135
+ self.schema_name, self.table_name, time_column)
62
136
 
63
- # Step 1: Build DataFrame with time_id and BUSINESS_DATE
137
+ # Step 1: Build DataFrame with TIME_ID and BUSINESS_DATE
64
138
  df_ = df.assign(
65
139
  time_id=tdml.sqlalchemy.literal_column(
66
- f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
140
+ f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
67
141
  tdml.BIGINT()
68
142
  ),
69
143
  BUSINESS_DATE=df[time_column]
70
144
  )[["time_id", "BUSINESS_DATE"]]
145
+ logger.debug("Constructed intermediate DataFrame with TIME_ID and BUSINESS_DATE.")
71
146
 
72
147
  # Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
73
148
  sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
@@ -75,9 +150,9 @@ class TimeManager:
75
150
 
76
151
  if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
77
152
  new_type = f"{type_business_date} WITH TIME ZONE"
78
- print(
79
- f"Data type of the time column modified from {type_business_date} "
80
- f"to {new_type}"
153
+ logger.info(
154
+ "Upcasting BUSINESS_DATE from %s to %s to preserve timezone.",
155
+ type_business_date, new_type
81
156
  )
82
157
  type_business_date = new_type
83
158
  sql_types["BUSINESS_DATE"] = new_type
@@ -89,13 +164,20 @@ class TimeManager:
89
164
  )
90
165
 
91
166
  self.data_type = type_business_date
167
+ logger.debug("Final BUSINESS_DATE SQL type: %s", self.data_type)
92
168
 
93
169
  # Step 3: Drop table if it exists
94
170
  try:
95
171
  tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
172
+ logger.debug("Dropped existing table %s.%s (if existed).", self.schema_name, self.table_name)
96
173
  except Exception as e:
174
+ # Not fatal; the table might not exist. Log at debug when in dev, warning otherwise.
175
+ e_str = str(e).split('\n')[0]
176
+ msg = f"Error dropping table {self.schema_name}.{self.table_name}: {e_str}"
97
177
  if tdfs4ds.DEBUG_MODE:
98
- print(f"Error dropping table {self.schema_name}.{self.table_name}: {e}")
178
+ logger.debug(msg)
179
+ else:
180
+ logger.warning(msg)
99
181
 
100
182
  # Step 4: Recreate table
101
183
  ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
@@ -106,6 +188,7 @@ class TimeManager:
106
188
  PRIMARY INDEX (time_id)
107
189
  """
108
190
  tdml.execute_sql(create_table_sql)
191
+ logger.info("Created table %s.%s with schema: %s", self.schema_name, self.table_name, sql_types)
109
192
 
110
193
  # Step 5: Insert data
111
194
  df_[list(sql_types.keys())].to_sql(
@@ -113,6 +196,7 @@ class TimeManager:
113
196
  schema_name=self.schema_name,
114
197
  if_exists="append"
115
198
  )
199
+ logger.info("Inserted %s time steps into %s.%s.", df_.shape[0], self.schema_name, self.table_name)
116
200
 
117
201
  # Step 6: Update view
118
202
  create_view_sql = f"""
@@ -122,40 +206,72 @@ class TimeManager:
122
206
  WHERE time_id = 1
123
207
  """
124
208
  tdml.execute_sql(create_view_sql)
209
+ logger.debug("Replaced view %s.%s to point at TIME_ID=1.", self.schema_name, self.view_name)
125
210
 
126
211
  # Step 7: Store number of time steps
127
212
  result = tdml.execute_sql(
128
213
  f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
129
214
  ).fetchall()
130
215
  self.nb_time_steps = result[0][0]
216
+ logger.info("Time steps loaded. nb_time_steps=%s", self.nb_time_steps)
131
217
 
132
-
133
- def _exists(self):
134
- """
135
- Checks if the table exists in the database.
218
+ def _exists(self) -> bool:
219
+ """Check if the hidden table exists in the schema.
136
220
 
137
221
  Returns:
138
- bool: True if the table exists, False otherwise.
222
+ True if the hidden table exists; False otherwise.
139
223
  """
224
+ exists = len([
225
+ x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
226
+ if x.lower().replace('"', '') == self.table_name.lower()
227
+ ]) > 0
228
+ logger.debug("Hidden table %s.%s exists? %s", self.schema_name, self.table_name, exists)
229
+ return exists
140
230
 
141
- return len([x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values if
142
- x.lower().replace('"', '') == self.table_name.lower()]) > 0
231
+ def _drop(self, drop_view: bool = False, force: bool = False) -> None:
232
+ """Drop the hidden table if we own it, and optionally the public view.
143
233
 
144
- def _drop(self):
145
- """
146
- Drops the table if it exists.
234
+ Args:
235
+ drop_view: If True, also drop the public view.
236
+ force: If True, drop the hidden table even if we don't own it.
237
+
238
+ Notes:
239
+ - The hidden table is dropped only if:
240
+ * self._owns_hidden is True, or
241
+ * force is True.
242
+ - The view can be dropped regardless of ownership when drop_view=True.
147
243
  """
148
- # Drop the table if it exists
244
+ # Drop hidden table
149
245
  if self._exists():
150
- tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
246
+ if getattr(self, "_owns_hidden", False) or force:
247
+ logger.info(
248
+ "Dropping hidden table %s.%s (force=%s).",
249
+ self.schema_name, self.table_name, force
250
+ )
251
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
252
+ else:
253
+ logger.warning(
254
+ "Refusing to drop hidden table %s.%s because this manager does not own it. "
255
+ "Use force=True to override.",
256
+ self.schema_name, self.table_name
257
+ )
258
+ else:
259
+ logger.debug("Hidden table %s.%s does not exist.", self.schema_name, self.table_name)
151
260
 
261
+ # Optionally drop view
262
+ if drop_view:
263
+ try:
264
+ logger.info("Dropping view %s.%s.", self.schema_name, self.view_name)
265
+ tdml.execute_sql(f"DROP VIEW {self.schema_name}.{self.view_name}")
266
+ except Exception as e:
267
+ logger.warning("Error dropping view %s.%s: %s", self.schema_name, self.view_name, e)
152
268
 
153
- def update(self, time_id):
154
- """
155
- Updates the view to apply a new filter based on the provided filter ID.
269
+
270
+ def update(self, time_id: int) -> None:
271
+ """Point the public view at a specific ``TIME_ID``.
156
272
 
157
273
  Args:
158
- filter_id (int): The ID of the filter to apply. The view will be updated to only show data that matches this filter ID.
274
+ time_id: The time step identifier to expose via the public view.
159
275
  """
160
276
  if self._exists():
161
277
  query = f"""
@@ -164,32 +280,44 @@ class TimeManager:
164
280
  FROM {self.schema_name}.{self.table_name}
165
281
  WHERE TIME_ID = {time_id}
166
282
  """
283
+ if getattr(tdfs4ds, "DEBUG_MODE", False):
284
+ logger_safe("debug", "Executing view update:\n%s", query)
167
285
 
168
- if tdfs4ds.DEBUG_MODE:
169
- print(query)
170
286
  tdml.execute_sql(query)
287
+ logger_safe("info", "Updated view %s.%s to TIME_ID=%s.", self.schema_name, self.view_name, time_id)
171
288
 
172
- def display(self):
173
- """
174
- Displays the table.
289
+ else:
290
+ logger_safe(
291
+ "warning",
292
+ "Cannot update view: hidden table %s.%s does not exist.",
293
+ self.schema_name, self.table_name
294
+ )
295
+
296
+ def display(self) -> pd.DataFrame:
297
+ """Return the current public view (one row: current BUSINESS_DATE).
175
298
 
176
299
  Returns:
177
- DataFrame: The table data as a DataFrame.
300
+ A pandas DataFrame with the current ``BUSINESS_DATE`` exposed by the view.
178
301
  """
179
-
302
+ logger.debug("Reading current BUSINESS_DATE from %s.%s.", self.schema_name, self.view_name)
180
303
  cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
181
- return pd.DataFrame(tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(), columns=cols)
304
+ return pd.DataFrame(
305
+ tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(),
306
+ columns=cols
307
+ )
182
308
 
183
- def get_date_in_the_past(self):
184
- """
185
- Retrieves the earliest date and time value from the table.
309
+ def get_date_in_the_past(self) -> str | None:
310
+ """Return the earliest BUSINESS_DATE from the public view as a string.
311
+
312
+ The format includes timezone offset when applicable:
313
+ - ``YYYY-MM-DD HH:MM:SS±HH:MM`` if timezone info is present;
314
+ - otherwise ``YYYY-MM-DD HH:MM:SS``.
186
315
 
187
316
  Returns:
188
- str: The earliest date and time value as a formatted string
189
- ('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
317
+ The formatted earliest date/time string, or ``None`` if parsing fails.
190
318
  """
191
- # Use iloc to preserve timezone awareness from pandas
192
319
  date_obj = self.display().BUSINESS_DATE.iloc[0]
320
+ logger.debug("Raw earliest BUSINESS_DATE value read: %r (%s)", date_obj, type(date_obj))
193
321
 
194
322
  if isinstance(date_obj, pd.Timestamp):
195
323
  datetime_obj = date_obj.to_pydatetime()
@@ -200,68 +328,390 @@ class TimeManager:
200
328
  elif isinstance(date_obj, np.datetime64):
201
329
  datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
202
330
  else:
203
- print("temp is of unrecognized type")
204
- print('temp', date_obj)
205
- print('temp type', type(date_obj))
206
- return
331
+ logger.warning("Unrecognized BUSINESS_DATE type: %s; value=%r", type(date_obj), date_obj)
332
+ return None
207
333
 
208
- # Format with timezone offset if available
209
334
  if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
210
335
  output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
211
336
  else:
212
337
  output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
213
338
 
339
+ logger.debug("Formatted earliest BUSINESS_DATE: %s", output_string)
214
340
  return output_string
215
341
 
216
- def get_list_date(self):
217
- """
218
- Retrieve a list of dates from the specified table.
219
-
220
- This function returns a DataFrame containing the dates from the table specified by the schema_name and table_name attributes of the class.
342
+ def get_list_date(self) -> tdml.DataFrame:
343
+ """Return the full list of time steps from the hidden table.
221
344
 
222
345
  Returns:
223
- DataFrame: A Teradata DataFrame containing the dates from the specified table.
346
+ A Teradata DataFrame over ``schema.table`` (hidden table) with
347
+ ``TIME_ID`` and ``BUSINESS_DATE``.
224
348
  """
349
+ logger.debug("Returning Teradata DataFrame for %s.%s.", self.schema_name, self.table_name)
225
350
  return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
226
351
 
227
- def generate_timeline(self, schema_name, view_name, current_included=True):
228
- """
229
- Generate a timeline view based on business dates from a hidden source view.
352
+ def generate_timeline(self, schema_name: str, view_name: str, current_included: bool = True) -> tdml.DataFrame:
353
+ """Create a timeline view filtered relative to the current business date.
230
354
 
231
- This function creates a new SQL view that includes business dates from a hidden source view.
232
- The timeline view can either include or exclude the current business date based on the `current_included` flag.
355
+ The new view (``schema_name.view_name``) selects dates from the hidden
356
+ source (``self.view_name + '_HIDDEN'``) up to the current business date
357
+ exposed by the public view (``self.view_name``).
233
358
 
234
- Parameters:
235
- schema_name (str): The name of the schema where the new view will be created.
236
- view_name (str): The name of the new view to be created.
237
- current_included (bool, optional): If True, includes the current business date in the timeline.
238
- If False, excludes the current business date. Defaults to True.
359
+ Args:
360
+ schema_name: Schema where the new timeline view will be created.
361
+ view_name: Name of the new timeline view.
362
+ current_included: If True, include the current business date;
363
+ otherwise, exclude it.
239
364
 
240
365
  Returns:
241
- DataFrame: A Teradata DataFrame representing the created timeline view, containing the selected business dates.
366
+ A Teradata DataFrame bound to the newly replaced timeline view.
242
367
  """
243
- # Construct the base query to replace the view with the timeline data
368
+ logger.info(
369
+ "Generating timeline view %s.%s (current_included=%s).",
370
+ schema_name, view_name, current_included
371
+ )
244
372
  query = f"""
245
373
  REPLACE VIEW {schema_name}.{view_name} AS
246
374
  SEL BUSINESS_DATE
247
375
  FROM {self.schema_name}.{self.view_name + '_HIDDEN'} A
248
376
  """
249
- # Modify the query based on whether the current business date should be included
250
377
  if current_included:
251
378
  query += f"WHERE BUSINESS_DATE <= (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
252
379
  else:
253
380
  query += f"WHERE BUSINESS_DATE < (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
254
381
 
255
- # Execute the query to create the view
256
382
  tdml.execute_sql(query)
257
- # Return the DataFrame of the created view
383
+ logger.debug("Replaced timeline view with query:\n%s", query.strip())
258
384
  return tdml.DataFrame(tdml.in_schema(schema_name, view_name))
259
385
 
260
- def get_current_step(self):
386
+ def get_current_step(self) -> int | None:
387
+ """Return the TIME_ID corresponding to the current BUSINESS_DATE in the view.
261
388
 
262
- res = tdml.execute_sql(f"SELECT TIME_ID FROM {self.table_name} WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})").fetchall()
389
+ Returns:
390
+ The current ``TIME_ID`` if exactly one match is found; otherwise ``None``.
391
+ """
392
+ # Note: original code omits schema qualifiers here; kept intentionally.
393
+ logger.debug("Fetching current TIME_ID from %s and %s.", self.table_name, self.view_name)
394
+ res = tdml.execute_sql(
395
+ f"SELECT TIME_ID FROM {self.table_name} "
396
+ f"WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})"
397
+ ).fetchall()
263
398
 
264
- if len(res)==1:
399
+ if len(res) == 1:
400
+ logger.info("Current TIME_ID resolved to %s.", res[0][0])
265
401
  return res[0][0]
266
402
 
267
- return
403
+ logger.warning("Could not resolve a unique current TIME_ID (rows returned: %s).", len(res))
404
+ return None
405
+
406
+ def clone_timer(
407
+ self,
408
+ source_timemanager,
409
+ time_id_to_apply: int = 1,
410
+ take_ownership: bool = False,
411
+ clone_mode: str = "soft",
412
+ if_exists: str = "error",
413
+ ):
414
+ """
415
+ Clone time-step definitions from another TimeManager.
416
+
417
+ Supports:
418
+ - soft clone (default): point this manager's view to the source _HIDDEN table
419
+ - hard clone: copy the source _HIDDEN table into this schema and own the copy
420
+
421
+ Args:
422
+ source_timemanager (TimeManager): Source manager to clone from.
423
+ time_id_to_apply (int, optional): TIME_ID to activate in the public view. Default: 1.
424
+ take_ownership (bool, optional): For soft clones only, whether this
425
+ manager should consider itself the owner of the hidden table.
426
+ (Hard clones always own their copy.) Default: False.
427
+ clone_mode (str, optional): "soft" or "hard". Default: "soft".
428
+ if_exists (str, optional): What to do if the destination hidden table already exists
429
+ - "error" (default): raise an exception
430
+ - "replace": drop and recreate
431
+ - "skip": reuse existing table
432
+
433
+ Returns:
434
+ TimeManager: self
435
+
436
+ Raises:
437
+ ValueError: On invalid clone_mode/if_exists or missing source table.
438
+ RuntimeError: If destination exists and if_exists="error".
439
+ """
440
+ if clone_mode not in ("soft", "hard"):
441
+ raise ValueError("clone_mode must be 'soft' or 'hard'")
442
+ if if_exists not in ("error", "replace", "skip"):
443
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
444
+
445
+ src_schema = source_timemanager.schema_name
446
+ src_hidden = source_timemanager.table_name
447
+
448
+ logger.info(
449
+ "Cloning timer",
450
+ extra={
451
+ "mode": clone_mode,
452
+ "source": f"{src_schema}.{src_hidden}",
453
+ "target_view": f"{self.schema_name}.{self.view_name}",
454
+ },
455
+ )
456
+
457
+ # Validate source existence
458
+ existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
459
+ if src_hidden.lower() not in existing_src:
460
+ raise ValueError(f"Source hidden timer table {src_schema}.{src_hidden} does not exist.")
461
+
462
+ if clone_mode == "hard":
463
+ # Hard clone → create (or reuse) a NEW hidden table in this schema
464
+ self.table_name = get_hidden_table_name(self.view_name)
465
+ existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
466
+
467
+ if self.table_name.lower() in existing_dest:
468
+ if if_exists == "error":
469
+ raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
470
+ elif if_exists == "replace":
471
+ logger.warning("Replacing existing table %s.%s", self.schema_name, self.table_name)
472
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
473
+ elif if_exists == "skip":
474
+ logger.info("Skipping clone, using existing %s.%s", self.schema_name, self.table_name)
475
+
476
+ if self.table_name.lower() not in existing_dest or if_exists == "replace":
477
+ logger.info("Creating cloned table %s.%s", self.schema_name, self.table_name)
478
+ create_sql = f"""
479
+ CREATE TABLE {self.schema_name}.{self.table_name} AS
480
+ (SELECT * FROM {src_schema}.{src_hidden})
481
+ WITH DATA
482
+ """
483
+ tdml.execute_sql(create_sql)
484
+
485
+ self._owns_hidden = True
486
+ target_schema = self.schema_name
487
+
488
+ else:
489
+ # Soft clone → just point to the source hidden table
490
+ logger.info("Soft clone: linking view to source hidden table")
491
+ self.table_name = src_hidden
492
+ self._owns_hidden = bool(take_ownership)
493
+ target_schema = src_schema # view will select from the source schema
494
+
495
+ # Load metadata from the target hidden table
496
+ df_meta = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
497
+ # Get data type for BUSINESS_DATE (if present)
498
+ try:
499
+ dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
500
+ self.data_type = dtypes.get("BUSINESS_DATE")
501
+ except Exception:
502
+ self.data_type = None
503
+
504
+ self.nb_time_steps = tdml.execute_sql(
505
+ f"SEL MAX(TIME_ID) FROM {target_schema}.{self.table_name}"
506
+ ).fetchall()[0][0]
507
+
508
+ # Rebuild the public view to the requested TIME_ID
509
+ view_sql = f"""
510
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
511
+ SELECT BUSINESS_DATE
512
+ FROM {target_schema}.{self.table_name}
513
+ WHERE TIME_ID = {int(time_id_to_apply)}
514
+ """
515
+ tdml.execute_sql(view_sql)
516
+
517
+ logger.info(
518
+ "Timer clone complete → Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
519
+ time_id_to_apply, self.nb_time_steps, self.data_type
520
+ )
521
+ return self
522
+
523
+ def take_ownership(
524
+ self,
525
+ create_copy: bool = True,
526
+ if_exists: str = "error",
527
+ ) -> "TimeManager":
528
+ """Promote this manager to OWN the hidden table.
529
+
530
+ Two modes:
531
+ - create_copy=True (default): Hard-promote by copying the current source
532
+ hidden table into this manager's schema as <view>_HIDDEN, repoint the
533
+ view, and set ownership.
534
+ - create_copy=False: Only mark as owned if the current hidden table is
535
+ already <schema=self.schema_name, table=<view>_HIDDEN>. Otherwise, warn.
536
+
537
+ Args:
538
+ create_copy: If True, copy data into this schema and repoint view.
539
+ if_exists: Behavior when the destination <view>_HIDDEN already exists:
540
+ - "error" (default): raise
541
+ - "replace": drop & recreate
542
+ - "skip": reuse existing
543
+
544
+ Returns:
545
+ TimeManager: self
546
+ """
547
+ if if_exists not in ("error", "replace", "skip"):
548
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
549
+
550
+ # Figure out current active TIME_ID to preserve selection after repointing
551
+ try:
552
+ current_time_id = self.get_current_step()
553
+ except Exception:
554
+ current_time_id = None
555
+ if current_time_id is None:
556
+ current_time_id = 1
557
+
558
+ dest_table = get_hidden_table_name(self.view_name)
559
+
560
+ if not create_copy:
561
+ # Only mark as owned if we already match <schema, view_HIDDEN>
562
+ if self.schema_name and self.table_name == dest_table:
563
+ logger.info(
564
+ "Marking existing hidden table %s.%s as owned (no copy).",
565
+ self.schema_name, self.table_name
566
+ )
567
+ self._owns_hidden = True
568
+ return self
569
+ logger.warning(
570
+ "Cannot take ownership without copying: current table is %s (expected %s). "
571
+ "Re-run with create_copy=True to copy into %s.%s.",
572
+ self.table_name, dest_table, self.schema_name, dest_table
573
+ )
574
+ return self
575
+
576
+ # We will copy data into <self.schema_name>.<view>_HIDDEN
577
+ dest_exists = [
578
+ t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
579
+ ]
580
+ need_create = True
581
+
582
+ if dest_table.lower() in dest_exists:
583
+ if if_exists == "error":
584
+ raise RuntimeError(f"Destination table {self.schema_name}.{dest_table} already exists.")
585
+ elif if_exists == "replace":
586
+ logger.warning("Replacing existing table %s.%s", self.schema_name, dest_table)
587
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=dest_table)
588
+ elif if_exists == "skip":
589
+ logger.info("Reusing existing destination table %s.%s", self.schema_name, dest_table)
590
+ need_create = False
591
+
592
+ if need_create:
593
+ logger.info(
594
+ "Creating owned copy %s.%s from current source %s.%s",
595
+ self.schema_name, dest_table, self.schema_name, self.table_name
596
+ )
597
+ # The current table might be in another schema; qualify from the DataFrame binding
598
+ # Derive the true source schema for safety
599
+ # (If you know it's always schema-qualified in self.table_name, keep as-is.)
600
+ src_schema = self.schema_name if self._exists() else None
601
+ # Fallback to probing the DataFrame binding for schema
602
+ if src_schema is None:
603
+ logger.debug("Could not verify source schema via _exists(); defaulting to self.schema_name.")
604
+ src_schema = self.schema_name
605
+
606
+ create_sql = f"""
607
+ CREATE TABLE {self.schema_name}.{dest_table} AS
608
+ (SELECT * FROM {src_schema}.{self.table_name})
609
+ WITH DATA
610
+ """
611
+ tdml.execute_sql(create_sql)
612
+
613
+ # Repoint this manager to the new owned table and rebuild the view
614
+ self.table_name = dest_table
615
+ self._owns_hidden = True
616
+
617
+ view_sql = f"""
618
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
619
+ SELECT BUSINESS_DATE
620
+ FROM {self.schema_name}.{self.table_name}
621
+ WHERE TIME_ID = {int(current_time_id)}
622
+ """
623
+ tdml.execute_sql(view_sql)
624
+
625
+ # Refresh metadata
626
+ try:
627
+ df_meta = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
628
+ dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
629
+ self.data_type = dtypes.get("BUSINESS_DATE")
630
+ except Exception:
631
+ pass
632
+ self.nb_time_steps = tdml.execute_sql(
633
+ f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
634
+ ).fetchall()[0][0]
635
+
636
+ logger.info(
637
+ "Ownership taken for %s.%s. Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
638
+ self.schema_name, self.table_name, current_time_id, self.nb_time_steps, self.data_type
639
+ )
640
+ return self
641
+
642
+ def get_current_timeid(self) -> int:
643
+ """
644
+ Extract the currently active TIME_ID from the public view's DDL.
645
+
646
+ Returns:
647
+ int: TIME_ID parsed from the view definition.
648
+
649
+ Raises:
650
+ ValueError: If the TIME_ID cannot be parsed from the DDL.
651
+ """
652
+ logger.debug("Reading view DDL to extract current TIME_ID")
653
+ txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
654
+
655
+ # Look for "WHERE TIME_ID = <number>" (allow whitespace/case variations)
656
+ m = re.search(r"WHERE\s+TIME_ID\s*=\s*(\d+)", txt, flags=re.IGNORECASE)
657
+ if not m:
658
+ logger.exception("Failed to parse TIME_ID from view DDL")
659
+ raise ValueError("Unable to parse current TIME_ID from view DDL.")
660
+ current = int(m.group(1))
661
+ logger.info("Current TIME_ID extracted", extra={"time_id": current})
662
+ return current
663
+
664
+
665
+ def print_view_ddl(self) -> None:
666
+ """
667
+ Log the view definition (DDL) for troubleshooting/traceability.
668
+ """
669
+ ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
670
+ logger.info("View DDL:\n%s", ddl)
671
+
672
+
673
+ def prune_time(self, time_id: int | None = None):
674
+ """
675
+ Remove all time steps with TIME_ID lower than `time_id` and renumber remaining ones.
676
+
677
+ If `time_id` is omitted, the method uses the current TIME_ID from the view.
678
+ After pruning, TIME_ID values are normalized so the smallest remaining id becomes 1,
679
+ and the public view is repointed to TIME_ID=1.
680
+
681
+ Args:
682
+ time_id (int, optional): Threshold id; rows with TIME_ID < time_id are deleted.
683
+
684
+ Returns:
685
+ TimeManager: Self, to allow method chaining.
686
+ """
687
+ if time_id is None:
688
+ time_id = self.get_current_timeid()
689
+
690
+ logger.info("Pruning time steps", extra={"threshold_time_id": time_id})
691
+
692
+ delete_sql = f"""
693
+ DELETE {self.schema_name}.{self.table_name}
694
+ WHERE TIME_ID < {int(time_id)}
695
+ """
696
+ update_sql = f"""
697
+ UPDATE {self.schema_name}.{self.table_name}
698
+ SET TIME_ID = TIME_ID - {int(time_id)} + 1
699
+ """
700
+
701
+ logger.debug("Executing prune delete", extra={"sql": delete_sql})
702
+ tdml.execute_sql(delete_sql)
703
+
704
+ logger.debug("Executing prune renumber", extra={"sql": update_sql})
705
+ tdml.execute_sql(update_sql)
706
+
707
+ # Refresh metadata and repoint view to TIME_ID=1
708
+ self.update(1)
709
+ self.nb_time_steps = tdml.execute_sql(
710
+ f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
711
+ ).fetchall()[0][0]
712
+
713
+ logger.info(
714
+ "Prune complete; active TIME_ID set to 1; nb_time_steps=%s",
715
+ self.nb_time_steps
716
+ )
717
+ return self