tdfs4ds 0.2.4.26__py3-none-any.whl → 0.2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,73 +1,165 @@
1
1
  import teradataml as tdml
2
2
  import datetime
3
+ from tdfs4ds import logger, logger_safe
4
+ import re
3
5
 
4
6
  import tdfs4ds
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
 
8
- def get_hidden_table_name(table_name):
9
- return table_name + '_HIDDEN'
10
+
11
+ def get_hidden_table_name(schema_name, view_name):
12
+ try:
13
+ ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=schema_name, view_name=view_name)
14
+
15
+ # Your current parsing is fragile; but keeping your intent:
16
+ backing = ddl.split("\n")[-2].split(".")[1]
17
+ return backing
18
+
19
+ except Exception as e:
20
+ msg = str(e)
21
+
22
+ # Teradata "object does not exist" -> normal for first-time setup
23
+ if "Error 3807" in msg or "does not exist" in msg.lower():
24
+ logger_safe(
25
+ "info",
26
+ "View %s.%s not found; using default hidden table name %s_HIDDEN.",
27
+ schema_name, view_name, view_name
28
+ )
29
+ else:
30
+ logger_safe(
31
+ "warning",
32
+ "Failed to extract hidden table name from DDL; defaulting to suffix method. Error: %s",
33
+ msg.splitlines()[0]
34
+ )
35
+
36
+ return view_name + "_HIDDEN"
37
+
38
+
39
+
10
40
  class TimeManager:
11
41
  """
12
- A class to manage time-related operations in a database table.
13
-
14
- Attributes:
15
- schema_name (str): Name of the schema in the database.
16
- table_name (str): Name of the table in the schema.
17
- data_type (str): Type of the date/time data, defaults to 'DATE'.
42
+ Manage versioned business time steps behind a Teradata-backed public view.
43
+
44
+ `TimeManager` stores a sequence of time “snapshots” in a hidden physical
45
+ table (`<view_name>_HIDDEN`) with two columns:
46
+ - `TIME_ID` (1..N): the step index, assigned deterministically via
47
+ `ROW_NUMBER()` over the input time column.
48
+ - `BUSINESS_DATE`: the business date/time associated with each step. When
49
+ loading, the SQL type is inferred and upcast to `TIMESTAMP WITH TIME ZONE`
50
+ when needed to preserve offsets.
51
+
52
+ A companion public view (`<view_name>`) always exposes the *current* business
53
+ date by filtering the hidden table on a single `TIME_ID`. Changing the
54
+ current step only rewrites the view definition—no data is mutated.
55
+
56
+ Key capabilities
57
+ - Load/replace the hidden table from a pandas DataFrame (`load_time_steps`).
58
+ - Switch the active time step by `TIME_ID` (`update`).
59
+ - Inspect the current date/time (`display`, `get_date_in_the_past`).
60
+ - Generate a timeline view up to (or strictly before) the current step
61
+ (`generate_timeline`).
62
+ - Prune older steps and renumber so the earliest remaining step becomes 1
63
+ (`prune_time`).
64
+ - Clone from another `TimeManager` (soft link or hard copy) and optionally
65
+ take ownership of the hidden table (`clone_timer`, `take_ownership`).
66
+ - Introspect the active step by parsing the view DDL (`get_current_timeid`,
67
+ `print_view_ddl`).
68
+
69
+ Workflow overview
70
+ 1) Instantiate `TimeManager` with a target `view_name` and `schema_name`.
71
+ 2) Call `load_time_steps(df, time_column)` to (re)create the hidden table and
72
+ point the public view at `TIME_ID = 1`.
73
+ 3) Use `update(time_id)` to switch the active business date.
74
+ 4) (Optional) Create derivative timeline views, prune older steps, or clone.
75
+
76
+ Parameters
77
+ table_name (str): Base public view name to manage (e.g., "MY_VIEW").
78
+ schema_name (str): Teradata schema/database that holds the artifacts.
79
+
80
+ Attributes
81
+ schema_name (str): Target schema for the view and hidden table.
82
+ table_name (str): Hidden table name (`<view_name>_HIDDEN`).
83
+ view_name (str): Public view name (`<view_name>`).
84
+ time_id (str): Name of the step identifier column (default: "time_id").
85
+ nb_time_steps (int | None): Number of steps detected after load/inspection.
86
+ data_type (str | None): SQL data type of `BUSINESS_DATE` (e.g., `DATE`,
87
+ `TIMESTAMP WITH TIME ZONE`), inferred during load/inspection.
88
+
89
+ Notes
90
+ - On initialization, if the hidden table already exists, metadata
91
+ (`data_type`, `nb_time_steps`) is auto-detected.
92
+ - `load_time_steps` will drop and recreate the hidden table to match the
93
+ inferred schema, then rebuild the public view.
94
+ - “Soft” cloning points this manager’s view at the source hidden table;
95
+ “hard” cloning copies the table into this schema and marks it owned.
96
+ - Ownership controls whether `_drop()` is allowed to remove the hidden
97
+ table (use `take_ownership` to promote ownership when appropriate).
18
98
  """
19
99
 
20
- def __init__(self, table_name, schema_name):
21
- """
22
- Initializes the TimeManager with a table name, schema name, and optionally a data type.
100
+ def __init__(self, table_name: str, schema_name: str) -> None:
101
+ """Initialize a TimeManager for an existing or future hidden table/view.
23
102
 
24
- If the table doesn't exist, it creates one with a BUSINESS_DATE column of the specified data type.
103
+ On initialization, if the hidden table already exists, the instance
104
+ inspects it to populate ``data_type`` and ``nb_time_steps``.
25
105
 
26
106
  Args:
27
- table_name (str): Name of the table.
28
- schema_name (str): Name of the schema.
29
- data_type (str, optional): Type of the date/time data. Defaults to 'DATE'.
107
+ table_name: Base public view name to manage (e.g., ``"MY_VIEW"``).
108
+ schema_name: Schema that contains/should contain the objects.
30
109
  """
31
110
  self.schema_name = schema_name
32
- self.table_name = get_hidden_table_name(table_name)
111
+ self.table_name = get_hidden_table_name(schema_name=schema_name,view_name=table_name)
33
112
  self.view_name = table_name
34
113
  self.time_id = 'time_id'
35
114
  self.nb_time_steps = None
36
115
  self.data_type = None
37
116
 
117
+ logger.debug(
118
+ "Initializing TimeManager for schema=%s, view=%s, table=%s",
119
+ self.schema_name, self.view_name, self.table_name
120
+ )
121
+
38
122
  if self._exists():
123
+ logger.debug("Hidden table %s.%s exists; inspecting metadata.", self.schema_name, self.table_name)
39
124
  df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
40
125
  d_ = {x[0]: x[1] for x in df._td_column_names_and_types}
41
- self.data_type = d_['BUSINESS_DATE']
42
- self.nb_time_steps = tdml.execute_sql(
43
- f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}").fetchall()[
44
- 0][0]
126
+ self.data_type = d_.get('BUSINESS_DATE')
127
+ self.nb_time_steps = tdml.execute_sql(
128
+ f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
129
+ ).fetchall()[0][0]
130
+ logger.info(
131
+ "Detected BUSINESS_DATE data_type=%s with nb_time_steps=%s",
132
+ self.data_type, self.nb_time_steps
133
+ )
45
134
 
46
- def load_time_steps(self, df, time_column):
47
- """
48
- Load time steps into the table and update the view accordingly.
135
+ def load_time_steps(self, df: pd.DataFrame, time_column: str) -> None:
136
+ """Load/replace the hidden table and (re)point the public view to step 1.
49
137
 
50
- This method:
51
- 1. Creates a new DataFrame with a sequential time_id and BUSINESS_DATE.
52
- 2. Ensures BUSINESS_DATE has the correct SQL data type.
53
- 3. Drops and recreates the target table with the appropriate schema.
54
- 4. Inserts the new data into the table.
55
- 5. Updates the view to reference the first time step.
56
- 6. Stores the number of time steps in `self.nb_time_steps`.
138
+ Workflow:
139
+ 1) Build a DataFrame with sequential ``TIME_ID`` and ``BUSINESS_DATE``.
140
+ 2) Infer SQL types and upcast to ``TIMESTAMP WITH TIME ZONE`` when
141
+ needed (to preserve offsets).
142
+ 3) Drop and recreate the hidden table with inferred schema.
143
+ 4) Append the rows.
144
+ 5) Replace the public view to expose ``TIME_ID = 1``.
145
+ 6) Store ``nb_time_steps``.
57
146
 
58
147
  Args:
59
- df (pd.DataFrame): The input DataFrame containing time data.
60
- time_column (str): The column name representing time.
148
+ df: Input pandas DataFrame with a time column.
149
+ time_column: Name of the time column in ``df`` to use as ``BUSINESS_DATE``.
61
150
  """
151
+ logger.info("Loading time steps into %s.%s from column '%s'.",
152
+ self.schema_name, self.table_name, time_column)
62
153
 
63
- # Step 1: Build DataFrame with time_id and BUSINESS_DATE
154
+ # Step 1: Build DataFrame with TIME_ID and BUSINESS_DATE
64
155
  df_ = df.assign(
65
156
  time_id=tdml.sqlalchemy.literal_column(
66
- f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
157
+ f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
67
158
  tdml.BIGINT()
68
159
  ),
69
160
  BUSINESS_DATE=df[time_column]
70
161
  )[["time_id", "BUSINESS_DATE"]]
162
+ logger.debug("Constructed intermediate DataFrame with TIME_ID and BUSINESS_DATE.")
71
163
 
72
164
  # Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
73
165
  sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
@@ -75,9 +167,9 @@ class TimeManager:
75
167
 
76
168
  if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
77
169
  new_type = f"{type_business_date} WITH TIME ZONE"
78
- print(
79
- f"Data type of the time column modified from {type_business_date} "
80
- f"to {new_type}"
170
+ logger.info(
171
+ "Upcasting BUSINESS_DATE from %s to %s to preserve timezone.",
172
+ type_business_date, new_type
81
173
  )
82
174
  type_business_date = new_type
83
175
  sql_types["BUSINESS_DATE"] = new_type
@@ -89,13 +181,20 @@ class TimeManager:
89
181
  )
90
182
 
91
183
  self.data_type = type_business_date
184
+ logger.debug("Final BUSINESS_DATE SQL type: %s", self.data_type)
92
185
 
93
186
  # Step 3: Drop table if it exists
94
187
  try:
95
188
  tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
189
+ logger.debug("Dropped existing table %s.%s (if existed).", self.schema_name, self.table_name)
96
190
  except Exception as e:
191
+ # Not fatal; the table might not exist. Log at debug when in dev, warning otherwise.
192
+ e_str = str(e).split('\n')[0]
193
+ msg = f"Error dropping table {self.schema_name}.{self.table_name}: {e_str}"
97
194
  if tdfs4ds.DEBUG_MODE:
98
- print(f"Error dropping table {self.schema_name}.{self.table_name}: {e}")
195
+ logger.debug(msg)
196
+ else:
197
+ logger.warning(msg)
99
198
 
100
199
  # Step 4: Recreate table
101
200
  ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
@@ -106,6 +205,7 @@ class TimeManager:
106
205
  PRIMARY INDEX (time_id)
107
206
  """
108
207
  tdml.execute_sql(create_table_sql)
208
+ logger.info("Created table %s.%s with schema: %s", self.schema_name, self.table_name, sql_types)
109
209
 
110
210
  # Step 5: Insert data
111
211
  df_[list(sql_types.keys())].to_sql(
@@ -113,6 +213,7 @@ class TimeManager:
113
213
  schema_name=self.schema_name,
114
214
  if_exists="append"
115
215
  )
216
+ logger.info("Inserted %s time steps into %s.%s.", df_.shape[0], self.schema_name, self.table_name)
116
217
 
117
218
  # Step 6: Update view
118
219
  create_view_sql = f"""
@@ -122,40 +223,72 @@ class TimeManager:
122
223
  WHERE time_id = 1
123
224
  """
124
225
  tdml.execute_sql(create_view_sql)
226
+ logger.debug("Replaced view %s.%s to point at TIME_ID=1.", self.schema_name, self.view_name)
125
227
 
126
228
  # Step 7: Store number of time steps
127
229
  result = tdml.execute_sql(
128
230
  f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
129
231
  ).fetchall()
130
232
  self.nb_time_steps = result[0][0]
233
+ logger.info("Time steps loaded. nb_time_steps=%s", self.nb_time_steps)
131
234
 
132
-
133
- def _exists(self):
134
- """
135
- Checks if the table exists in the database.
235
+ def _exists(self) -> bool:
236
+ """Check if the hidden table exists in the schema.
136
237
 
137
238
  Returns:
138
- bool: True if the table exists, False otherwise.
239
+ True if the hidden table exists; False otherwise.
139
240
  """
241
+ exists = len([
242
+ x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
243
+ if x.lower().replace('"', '') == self.table_name.lower()
244
+ ]) > 0
245
+ logger.debug("Hidden table %s.%s exists? %s", self.schema_name, self.table_name, exists)
246
+ return exists
140
247
 
141
- return len([x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values if
142
- x.lower().replace('"', '') == self.table_name.lower()]) > 0
248
+ def _drop(self, drop_view: bool = False, force: bool = False) -> None:
249
+ """Drop the hidden table if we own it, and optionally the public view.
143
250
 
144
- def _drop(self):
145
- """
146
- Drops the table if it exists.
251
+ Args:
252
+ drop_view: If True, also drop the public view.
253
+ force: If True, drop the hidden table even if we don't own it.
254
+
255
+ Notes:
256
+ - The hidden table is dropped only if:
257
+ * self._owns_hidden is True, or
258
+ * force is True.
259
+ - The view can be dropped regardless of ownership when drop_view=True.
147
260
  """
148
- # Drop the table if it exists
261
+ # Drop hidden table
149
262
  if self._exists():
150
- tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
263
+ if getattr(self, "_owns_hidden", False) or force:
264
+ logger.info(
265
+ "Dropping hidden table %s.%s (force=%s).",
266
+ self.schema_name, self.table_name, force
267
+ )
268
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
269
+ else:
270
+ logger.warning(
271
+ "Refusing to drop hidden table %s.%s because this manager does not own it. "
272
+ "Use force=True to override.",
273
+ self.schema_name, self.table_name
274
+ )
275
+ else:
276
+ logger.debug("Hidden table %s.%s does not exist.", self.schema_name, self.table_name)
151
277
 
278
+ # Optionally drop view
279
+ if drop_view:
280
+ try:
281
+ logger.info("Dropping view %s.%s.", self.schema_name, self.view_name)
282
+ tdml.execute_sql(f"DROP VIEW {self.schema_name}.{self.view_name}")
283
+ except Exception as e:
284
+ logger.warning("Error dropping view %s.%s: %s", self.schema_name, self.view_name, e)
152
285
 
153
- def update(self, time_id):
154
- """
155
- Updates the view to apply a new filter based on the provided filter ID.
286
+
287
+ def update(self, time_id: int) -> None:
288
+ """Point the public view at a specific ``TIME_ID``.
156
289
 
157
290
  Args:
158
- filter_id (int): The ID of the filter to apply. The view will be updated to only show data that matches this filter ID.
291
+ time_id: The time step identifier to expose via the public view.
159
292
  """
160
293
  if self._exists():
161
294
  query = f"""
@@ -164,32 +297,44 @@ class TimeManager:
164
297
  FROM {self.schema_name}.{self.table_name}
165
298
  WHERE TIME_ID = {time_id}
166
299
  """
300
+ if getattr(tdfs4ds, "DEBUG_MODE", False):
301
+ logger_safe("debug", "Executing view update:\n%s", query)
167
302
 
168
- if tdfs4ds.DEBUG_MODE:
169
- print(query)
170
303
  tdml.execute_sql(query)
304
+ logger_safe("info", "Updated view %s.%s to TIME_ID=%s.", self.schema_name, self.view_name, time_id)
171
305
 
172
- def display(self):
173
- """
174
- Displays the table.
306
+ else:
307
+ logger_safe(
308
+ "warning",
309
+ "Cannot update view: hidden table %s.%s does not exist.",
310
+ self.schema_name, self.table_name
311
+ )
312
+
313
+ def display(self) -> pd.DataFrame:
314
+ """Return the current public view (one row: current BUSINESS_DATE).
175
315
 
176
316
  Returns:
177
- DataFrame: The table data as a DataFrame.
317
+ A pandas DataFrame with the current ``BUSINESS_DATE`` exposed by the view.
178
318
  """
179
-
319
+ logger.debug("Reading current BUSINESS_DATE from %s.%s.", self.schema_name, self.view_name)
180
320
  cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
181
- return pd.DataFrame(tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(), columns=cols)
321
+ return pd.DataFrame(
322
+ tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(),
323
+ columns=cols
324
+ )
182
325
 
183
- def get_date_in_the_past(self):
184
- """
185
- Retrieves the earliest date and time value from the table.
326
+ def get_date_in_the_past(self) -> str | None:
327
+ """Return the earliest BUSINESS_DATE from the public view as a string.
328
+
329
+ The format includes timezone offset when applicable:
330
+ - ``YYYY-MM-DD HH:MM:SS±HH:MM`` if timezone info is present;
331
+ - otherwise ``YYYY-MM-DD HH:MM:SS``.
186
332
 
187
333
  Returns:
188
- str: The earliest date and time value as a formatted string
189
- ('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
334
+ The formatted earliest date/time string, or ``None`` if parsing fails.
190
335
  """
191
- # Use iloc to preserve timezone awareness from pandas
192
336
  date_obj = self.display().BUSINESS_DATE.iloc[0]
337
+ logger.debug("Raw earliest BUSINESS_DATE value read: %r (%s)", date_obj, type(date_obj))
193
338
 
194
339
  if isinstance(date_obj, pd.Timestamp):
195
340
  datetime_obj = date_obj.to_pydatetime()
@@ -200,68 +345,390 @@ class TimeManager:
200
345
  elif isinstance(date_obj, np.datetime64):
201
346
  datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
202
347
  else:
203
- print("temp is of unrecognized type")
204
- print('temp', date_obj)
205
- print('temp type', type(date_obj))
206
- return
348
+ logger.warning("Unrecognized BUSINESS_DATE type: %s; value=%r", type(date_obj), date_obj)
349
+ return None
207
350
 
208
- # Format with timezone offset if available
209
351
  if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
210
352
  output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
211
353
  else:
212
354
  output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
213
355
 
356
+ logger.debug("Formatted earliest BUSINESS_DATE: %s", output_string)
214
357
  return output_string
215
358
 
216
- def get_list_date(self):
217
- """
218
- Retrieve a list of dates from the specified table.
219
-
220
- This function returns a DataFrame containing the dates from the table specified by the schema_name and table_name attributes of the class.
359
+ def get_list_date(self) -> tdml.DataFrame:
360
+ """Return the full list of time steps from the hidden table.
221
361
 
222
362
  Returns:
223
- DataFrame: A Teradata DataFrame containing the dates from the specified table.
363
+ A Teradata DataFrame over ``schema.table`` (hidden table) with
364
+ ``TIME_ID`` and ``BUSINESS_DATE``.
224
365
  """
366
+ logger.debug("Returning Teradata DataFrame for %s.%s.", self.schema_name, self.table_name)
225
367
  return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
226
368
 
227
- def generate_timeline(self, schema_name, view_name, current_included=True):
228
- """
229
- Generate a timeline view based on business dates from a hidden source view.
369
+ def generate_timeline(self, schema_name: str, view_name: str, current_included: bool = True) -> tdml.DataFrame:
370
+ """Create a timeline view filtered relative to the current business date.
230
371
 
231
- This function creates a new SQL view that includes business dates from a hidden source view.
232
- The timeline view can either include or exclude the current business date based on the `current_included` flag.
372
+ The new view (``schema_name.view_name``) selects dates from the hidden
373
+ source (``self.view_name + '_HIDDEN'``) up to the current business date
374
+ exposed by the public view (``self.view_name``).
233
375
 
234
- Parameters:
235
- schema_name (str): The name of the schema where the new view will be created.
236
- view_name (str): The name of the new view to be created.
237
- current_included (bool, optional): If True, includes the current business date in the timeline.
238
- If False, excludes the current business date. Defaults to True.
376
+ Args:
377
+ schema_name: Schema where the new timeline view will be created.
378
+ view_name: Name of the new timeline view.
379
+ current_included: If True, include the current business date;
380
+ otherwise, exclude it.
239
381
 
240
382
  Returns:
241
- DataFrame: A Teradata DataFrame representing the created timeline view, containing the selected business dates.
383
+ A Teradata DataFrame bound to the newly replaced timeline view.
242
384
  """
243
- # Construct the base query to replace the view with the timeline data
385
+ logger.info(
386
+ "Generating timeline view %s.%s (current_included=%s).",
387
+ schema_name, view_name, current_included
388
+ )
244
389
  query = f"""
245
390
  REPLACE VIEW {schema_name}.{view_name} AS
246
391
  SEL BUSINESS_DATE
247
392
  FROM {self.schema_name}.{self.view_name + '_HIDDEN'} A
248
393
  """
249
- # Modify the query based on whether the current business date should be included
250
394
  if current_included:
251
395
  query += f"WHERE BUSINESS_DATE <= (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
252
396
  else:
253
397
  query += f"WHERE BUSINESS_DATE < (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
254
398
 
255
- # Execute the query to create the view
256
399
  tdml.execute_sql(query)
257
- # Return the DataFrame of the created view
400
+ logger.debug("Replaced timeline view with query:\n%s", query.strip())
258
401
  return tdml.DataFrame(tdml.in_schema(schema_name, view_name))
259
402
 
260
- def get_current_step(self):
403
+ def get_current_step(self) -> int | None:
404
+ """Return the TIME_ID corresponding to the current BUSINESS_DATE in the view.
261
405
 
262
- res = tdml.execute_sql(f"SELECT TIME_ID FROM {self.table_name} WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})").fetchall()
406
+ Returns:
407
+ The current ``TIME_ID`` if exactly one match is found; otherwise ``None``.
408
+ """
409
+ # Note: original code omits schema qualifiers here; kept intentionally.
410
+ logger.debug("Fetching current TIME_ID from %s and %s.", self.table_name, self.view_name)
411
+ res = tdml.execute_sql(
412
+ f"SELECT TIME_ID FROM {self.table_name} "
413
+ f"WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})"
414
+ ).fetchall()
263
415
 
264
- if len(res)==1:
416
+ if len(res) == 1:
417
+ logger.info("Current TIME_ID resolved to %s.", res[0][0])
265
418
  return res[0][0]
266
419
 
267
- return
420
+ logger.warning("Could not resolve a unique current TIME_ID (rows returned: %s).", len(res))
421
+ return None
422
+
423
+ def clone_timer(
424
+ self,
425
+ source_timemanager,
426
+ time_id_to_apply: int = 1,
427
+ take_ownership: bool = False,
428
+ clone_mode: str = "soft",
429
+ if_exists: str = "error",
430
+ ):
431
+ """
432
+ Clone time-step definitions from another TimeManager.
433
+
434
+ Supports:
435
+ - soft clone (default): point this manager's view to the source _HIDDEN table
436
+ - hard clone: copy the source _HIDDEN table into this schema and own the copy
437
+
438
+ Args:
439
+ source_timemanager (TimeManager): Source manager to clone from.
440
+ time_id_to_apply (int, optional): TIME_ID to activate in the public view. Default: 1.
441
+ take_ownership (bool, optional): For soft clones only, whether this
442
+ manager should consider itself the owner of the hidden table.
443
+ (Hard clones always own their copy.) Default: False.
444
+ clone_mode (str, optional): "soft" or "hard". Default: "soft".
445
+ if_exists (str, optional): What to do if the destination hidden table already exists
446
+ - "error" (default): raise an exception
447
+ - "replace": drop and recreate
448
+ - "skip": reuse existing table
449
+
450
+ Returns:
451
+ TimeManager: self
452
+
453
+ Raises:
454
+ ValueError: On invalid clone_mode/if_exists or missing source table.
455
+ RuntimeError: If destination exists and if_exists="error".
456
+ """
457
+ if clone_mode not in ("soft", "hard"):
458
+ raise ValueError("clone_mode must be 'soft' or 'hard'")
459
+ if if_exists not in ("error", "replace", "skip"):
460
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
461
+
462
+ src_schema = source_timemanager.schema_name
463
+ src_hidden = source_timemanager.table_name
464
+
465
+ logger.info(
466
+ "Cloning timer",
467
+ extra={
468
+ "mode": clone_mode,
469
+ "source": f"{src_schema}.{src_hidden}",
470
+ "target_view": f"{self.schema_name}.{self.view_name}",
471
+ },
472
+ )
473
+
474
+ # Validate source existence
475
+ existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
476
+ if src_hidden.lower() not in existing_src:
477
+ raise ValueError(f"Source hidden timer table {src_schema}.{src_hidden} does not exist.")
478
+
479
+ if clone_mode == "hard":
480
+ # Hard clone → create (or reuse) a NEW hidden table in this schema
481
+ self.table_name = get_hidden_table_name(schema_name=self.schema_name, view_name=self.view_name)
482
+ existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
483
+
484
+ if self.table_name.lower() in existing_dest:
485
+ if if_exists == "error":
486
+ raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
487
+ elif if_exists == "replace":
488
+ logger.warning("Replacing existing table %s.%s", self.schema_name, self.table_name)
489
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
490
+ elif if_exists == "skip":
491
+ logger.info("Skipping clone, using existing %s.%s", self.schema_name, self.table_name)
492
+
493
+ if self.table_name.lower() not in existing_dest or if_exists == "replace":
494
+ logger.info("Creating cloned table %s.%s", self.schema_name, self.table_name)
495
+ create_sql = f"""
496
+ CREATE TABLE {self.schema_name}.{self.table_name} AS
497
+ (SELECT * FROM {src_schema}.{src_hidden})
498
+ WITH DATA
499
+ """
500
+ tdml.execute_sql(create_sql)
501
+
502
+ self._owns_hidden = True
503
+ target_schema = self.schema_name
504
+
505
+ else:
506
+ # Soft clone → just point to the source hidden table
507
+ logger.info("Soft clone: linking view to source hidden table")
508
+ self.table_name = src_hidden
509
+ self._owns_hidden = bool(take_ownership)
510
+ target_schema = src_schema # view will select from the source schema
511
+
512
+ # Load metadata from the target hidden table
513
+ df_meta = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
514
+ # Get data type for BUSINESS_DATE (if present)
515
+ try:
516
+ dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
517
+ self.data_type = dtypes.get("BUSINESS_DATE")
518
+ except Exception:
519
+ self.data_type = None
520
+
521
+ self.nb_time_steps = tdml.execute_sql(
522
+ f"SEL MAX(TIME_ID) FROM {target_schema}.{self.table_name}"
523
+ ).fetchall()[0][0]
524
+
525
+ # Rebuild the public view to the requested TIME_ID
526
+ view_sql = f"""
527
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
528
+ SELECT BUSINESS_DATE
529
+ FROM {target_schema}.{self.table_name}
530
+ WHERE TIME_ID = {int(time_id_to_apply)}
531
+ """
532
+ tdml.execute_sql(view_sql)
533
+
534
+ logger.info(
535
+ "Timer clone complete → Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
536
+ time_id_to_apply, self.nb_time_steps, self.data_type
537
+ )
538
+ return self
539
+
540
+ def take_ownership(
541
+ self,
542
+ create_copy: bool = True,
543
+ if_exists: str = "error",
544
+ ) -> "TimeManager":
545
+ """Promote this manager to OWN the hidden table.
546
+
547
+ Two modes:
548
+ - create_copy=True (default): Hard-promote by copying the current source
549
+ hidden table into this manager's schema as <view>_HIDDEN, repoint the
550
+ view, and set ownership.
551
+ - create_copy=False: Only mark as owned if the current hidden table is
552
+ already <schema=self.schema_name, table=<view>_HIDDEN>. Otherwise, warn.
553
+
554
+ Args:
555
+ create_copy: If True, copy data into this schema and repoint view.
556
+ if_exists: Behavior when the destination <view>_HIDDEN already exists:
557
+ - "error" (default): raise
558
+ - "replace": drop & recreate
559
+ - "skip": reuse existing
560
+
561
+ Returns:
562
+ TimeManager: self
563
+ """
564
+ if if_exists not in ("error", "replace", "skip"):
565
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
566
+
567
+ # Figure out current active TIME_ID to preserve selection after repointing
568
+ try:
569
+ current_time_id = self.get_current_step()
570
+ except Exception:
571
+ current_time_id = None
572
+ if current_time_id is None:
573
+ current_time_id = 1
574
+
575
+ dest_table = get_hidden_table_name(schema_name=self.schema_name, view_name=self.view_name)
576
+
577
+ if not create_copy:
578
+ # Only mark as owned if we already match <schema, view_HIDDEN>
579
+ if self.schema_name and self.table_name == dest_table:
580
+ logger.info(
581
+ "Marking existing hidden table %s.%s as owned (no copy).",
582
+ self.schema_name, self.table_name
583
+ )
584
+ self._owns_hidden = True
585
+ return self
586
+ logger.warning(
587
+ "Cannot take ownership without copying: current table is %s (expected %s). "
588
+ "Re-run with create_copy=True to copy into %s.%s.",
589
+ self.table_name, dest_table, self.schema_name, dest_table
590
+ )
591
+ return self
592
+
593
+ # We will copy data into <self.schema_name>.<view>_HIDDEN
594
+ dest_exists = [
595
+ t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
596
+ ]
597
+ need_create = True
598
+
599
+ if dest_table.lower() in dest_exists:
600
+ if if_exists == "error":
601
+ raise RuntimeError(f"Destination table {self.schema_name}.{dest_table} already exists.")
602
+ elif if_exists == "replace":
603
+ logger.warning("Replacing existing table %s.%s", self.schema_name, dest_table)
604
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=dest_table)
605
+ elif if_exists == "skip":
606
+ logger.info("Reusing existing destination table %s.%s", self.schema_name, dest_table)
607
+ need_create = False
608
+
609
+ if need_create:
610
+ logger.info(
611
+ "Creating owned copy %s.%s from current source %s.%s",
612
+ self.schema_name, dest_table, self.schema_name, self.table_name
613
+ )
614
+ # The current table might be in another schema; qualify from the DataFrame binding
615
+ # Derive the true source schema for safety
616
+ # (If you know it's always schema-qualified in self.table_name, keep as-is.)
617
+ src_schema = self.schema_name if self._exists() else None
618
+ # Fallback to probing the DataFrame binding for schema
619
+ if src_schema is None:
620
+ logger.debug("Could not verify source schema via _exists(); defaulting to self.schema_name.")
621
+ src_schema = self.schema_name
622
+
623
+ create_sql = f"""
624
+ CREATE TABLE {self.schema_name}.{dest_table} AS
625
+ (SELECT * FROM {src_schema}.{self.table_name})
626
+ WITH DATA
627
+ """
628
+ tdml.execute_sql(create_sql)
629
+
630
+ # Repoint this manager to the new owned table and rebuild the view
631
+ self.table_name = dest_table
632
+ self._owns_hidden = True
633
+
634
+ view_sql = f"""
635
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
636
+ SELECT BUSINESS_DATE
637
+ FROM {self.schema_name}.{self.table_name}
638
+ WHERE TIME_ID = {int(current_time_id)}
639
+ """
640
+ tdml.execute_sql(view_sql)
641
+
642
+ # Refresh metadata
643
+ try:
644
+ df_meta = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
645
+ dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
646
+ self.data_type = dtypes.get("BUSINESS_DATE")
647
+ except Exception:
648
+ pass
649
+ self.nb_time_steps = tdml.execute_sql(
650
+ f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
651
+ ).fetchall()[0][0]
652
+
653
+ logger.info(
654
+ "Ownership taken for %s.%s. Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
655
+ self.schema_name, self.table_name, current_time_id, self.nb_time_steps, self.data_type
656
+ )
657
+ return self
658
+
659
+ def get_current_timeid(self) -> int:
660
+ """
661
+ Extract the currently active TIME_ID from the public view's DDL.
662
+
663
+ Returns:
664
+ int: TIME_ID parsed from the view definition.
665
+
666
+ Raises:
667
+ ValueError: If the TIME_ID cannot be parsed from the DDL.
668
+ """
669
+ logger.debug("Reading view DDL to extract current TIME_ID")
670
+ txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
671
+
672
+ # Look for "WHERE TIME_ID = <number>" (allow whitespace/case variations)
673
+ m = re.search(r"WHERE\s+TIME_ID\s*=\s*(\d+)", txt, flags=re.IGNORECASE)
674
+ if not m:
675
+ logger.exception("Failed to parse TIME_ID from view DDL")
676
+ raise ValueError("Unable to parse current TIME_ID from view DDL.")
677
+ current = int(m.group(1))
678
+ logger.info("Current TIME_ID extracted", extra={"time_id": current})
679
+ return current
680
+
681
+
682
+ def print_view_ddl(self) -> None:
683
+ """
684
+ Log the view definition (DDL) for troubleshooting/traceability.
685
+ """
686
+ ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
687
+ logger.info("View DDL:\n%s", ddl)
688
+
689
+
690
+ def prune_time(self, time_id: int | None = None):
691
+ """
692
+ Remove all time steps with TIME_ID lower than `time_id` and renumber remaining ones.
693
+
694
+ If `time_id` is omitted, the method uses the current TIME_ID from the view.
695
+ After pruning, TIME_ID values are normalized so the smallest remaining id becomes 1,
696
+ and the public view is repointed to TIME_ID=1.
697
+
698
+ Args:
699
+ time_id (int, optional): Threshold id; rows with TIME_ID < time_id are deleted.
700
+
701
+ Returns:
702
+ TimeManager: Self, to allow method chaining.
703
+ """
704
+ if time_id is None:
705
+ time_id = self.get_current_timeid()
706
+
707
+ logger.info("Pruning time steps", extra={"threshold_time_id": time_id})
708
+
709
+ delete_sql = f"""
710
+ DELETE {self.schema_name}.{self.table_name}
711
+ WHERE TIME_ID < {int(time_id)}
712
+ """
713
+ update_sql = f"""
714
+ UPDATE {self.schema_name}.{self.table_name}
715
+ SET TIME_ID = TIME_ID - {int(time_id)} + 1
716
+ """
717
+
718
+ logger.debug("Executing prune delete", extra={"sql": delete_sql})
719
+ tdml.execute_sql(delete_sql)
720
+
721
+ logger.debug("Executing prune renumber", extra={"sql": update_sql})
722
+ tdml.execute_sql(update_sql)
723
+
724
+ # Refresh metadata and repoint view to TIME_ID=1
725
+ self.update(1)
726
+ self.nb_time_steps = tdml.execute_sql(
727
+ f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
728
+ ).fetchall()[0][0]
729
+
730
+ logger.info(
731
+ "Prune complete; active TIME_ID set to 1; nb_time_steps=%s",
732
+ self.nb_time_steps
733
+ )
734
+ return self